2016-09-10 21:09:16 +08:00
|
|
|
//===-- BUFInstructions.td - Buffer Instruction Defintions ----------------===//
|
|
|
|
//
|
2019-01-19 16:50:56 +08:00
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
2016-09-10 21:09:16 +08:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
def MUBUFAddr32 : ComplexPattern<i64, 9, "SelectMUBUFAddr32">;
|
2019-05-01 06:08:23 +08:00
|
|
|
def MUBUFAddr64 : ComplexPattern<i64, 8, "SelectMUBUFAddr64">;
|
2016-09-10 21:09:16 +08:00
|
|
|
def MUBUFAddr64Atomic : ComplexPattern<i64, 5, "SelectMUBUFAddr64">;
|
|
|
|
|
2017-09-20 13:01:53 +08:00
|
|
|
def MUBUFScratchOffen : ComplexPattern<i64, 4, "SelectMUBUFScratchOffen", [], [SDNPWantParent]>;
|
|
|
|
def MUBUFScratchOffset : ComplexPattern<i64, 3, "SelectMUBUFScratchOffset", [], [SDNPWantParent], 20>;
|
2017-04-25 03:40:59 +08:00
|
|
|
|
2019-05-01 06:08:23 +08:00
|
|
|
def MUBUFOffset : ComplexPattern<i64, 7, "SelectMUBUFOffset">;
|
2016-09-10 21:09:16 +08:00
|
|
|
def MUBUFOffsetNoGLC : ComplexPattern<i64, 3, "SelectMUBUFOffset">;
|
|
|
|
def MUBUFOffsetAtomic : ComplexPattern<i64, 4, "SelectMUBUFOffset">;
|
|
|
|
|
|
|
|
def BUFAddrKind {
|
|
|
|
int Offset = 0;
|
|
|
|
int OffEn = 1;
|
|
|
|
int IdxEn = 2;
|
|
|
|
int BothEn = 3;
|
|
|
|
int Addr64 = 4;
|
|
|
|
}
|
|
|
|
|
|
|
|
class getAddrName<int addrKind> {
|
|
|
|
string ret =
|
|
|
|
!if(!eq(addrKind, BUFAddrKind.Offset), "offset",
|
|
|
|
!if(!eq(addrKind, BUFAddrKind.OffEn), "offen",
|
|
|
|
!if(!eq(addrKind, BUFAddrKind.IdxEn), "idxen",
|
|
|
|
!if(!eq(addrKind, BUFAddrKind.BothEn), "bothen",
|
|
|
|
!if(!eq(addrKind, BUFAddrKind.Addr64), "addr64",
|
|
|
|
"")))));
|
|
|
|
}
|
|
|
|
|
TableGen: Streamline the semantics of NAME
Summary:
The new rules are straightforward. The main rules to keep in mind
are:
1. NAME is an implicit template argument of class and multiclass,
and will be substituted by the name of the instantiating def/defm.
2. The name of a def/defm in a multiclass must contain a reference
to NAME. If such a reference is not present, it is automatically
prepended.
And for some additional subtleties, consider these:
3. defm with no name generates a unique name but has no special
behavior otherwise.
4. def with no name generates an anonymous record, whose name is
unique but undefined. In particular, the name won't contain a
reference to NAME.
Keeping rules 1&2 in mind should allow a predictable behavior of
name resolution that is simple to follow.
The old "rules" were rather surprising: sometimes (but not always),
NAME would correspond to the name of the toplevel defm. They were
also plain bonkers when you pushed them to their limits, as the old
version of the TableGen test case shows.
Having NAME correspond to the name of the toplevel defm introduces
"spooky action at a distance" and breaks composability:
refactoring the upper layers of a hierarchy of nested multiclass
instantiations can cause unexpected breakage by changing the value
of NAME at a lower level of the hierarchy. The new rules don't
suffer from this problem.
Some existing .td files have to be adjusted because they ended up
depending on the details of the old implementation.
Change-Id: I694095231565b30f563e6fd0417b41ee01a12589
Reviewers: tra, simon_tatham, craig.topper, MartinO, arsenm, javed.absar
Subscribers: wdng, llvm-commits
Differential Revision: https://reviews.llvm.org/D47430
llvm-svn: 333900
2018-06-04 22:26:05 +08:00
|
|
|
class MUBUFAddr64Table <bit is_addr64, string Name> {
|
2016-09-10 21:09:16 +08:00
|
|
|
bit IsAddr64 = is_addr64;
|
TableGen: Streamline the semantics of NAME
Summary:
The new rules are straightforward. The main rules to keep in mind
are:
1. NAME is an implicit template argument of class and multiclass,
and will be substituted by the name of the instantiating def/defm.
2. The name of a def/defm in a multiclass must contain a reference
to NAME. If such a reference is not present, it is automatically
prepended.
And for some additional subtleties, consider these:
3. defm with no name generates a unique name but has no special
behavior otherwise.
4. def with no name generates an anonymous record, whose name is
unique but undefined. In particular, the name won't contain a
reference to NAME.
Keeping rules 1&2 in mind should allow a predictable behavior of
name resolution that is simple to follow.
The old "rules" were rather surprising: sometimes (but not always),
NAME would correspond to the name of the toplevel defm. They were
also plain bonkers when you pushed them to their limits, as the old
version of the TableGen test case shows.
Having NAME correspond to the name of the toplevel defm introduces
"spooky action at a distance" and breaks composability:
refactoring the upper layers of a hierarchy of nested multiclass
instantiations can cause unexpected breakage by changing the value
of NAME at a lower level of the hierarchy. The new rules don't
suffer from this problem.
Some existing .td files have to be adjusted because they ended up
depending on the details of the old implementation.
Change-Id: I694095231565b30f563e6fd0417b41ee01a12589
Reviewers: tra, simon_tatham, craig.topper, MartinO, arsenm, javed.absar
Subscribers: wdng, llvm-commits
Differential Revision: https://reviews.llvm.org/D47430
llvm-svn: 333900
2018-06-04 22:26:05 +08:00
|
|
|
string OpName = Name;
|
2016-09-10 21:09:16 +08:00
|
|
|
}
|
|
|
|
|
TableGen: Streamline the semantics of NAME
Summary:
The new rules are straightforward. The main rules to keep in mind
are:
1. NAME is an implicit template argument of class and multiclass,
and will be substituted by the name of the instantiating def/defm.
2. The name of a def/defm in a multiclass must contain a reference
to NAME. If such a reference is not present, it is automatically
prepended.
And for some additional subtleties, consider these:
3. defm with no name generates a unique name but has no special
behavior otherwise.
4. def with no name generates an anonymous record, whose name is
unique but undefined. In particular, the name won't contain a
reference to NAME.
Keeping rules 1&2 in mind should allow a predictable behavior of
name resolution that is simple to follow.
The old "rules" were rather surprising: sometimes (but not always),
NAME would correspond to the name of the toplevel defm. They were
also plain bonkers when you pushed them to their limits, as the old
version of the TableGen test case shows.
Having NAME correspond to the name of the toplevel defm introduces
"spooky action at a distance" and breaks composability:
refactoring the upper layers of a hierarchy of nested multiclass
instantiations can cause unexpected breakage by changing the value
of NAME at a lower level of the hierarchy. The new rules don't
suffer from this problem.
Some existing .td files have to be adjusted because they ended up
depending on the details of the old implementation.
Change-Id: I694095231565b30f563e6fd0417b41ee01a12589
Reviewers: tra, simon_tatham, craig.topper, MartinO, arsenm, javed.absar
Subscribers: wdng, llvm-commits
Differential Revision: https://reviews.llvm.org/D47430
llvm-svn: 333900
2018-06-04 22:26:05 +08:00
|
|
|
class MUBUFLdsTable <bit is_lds, string Name> {
|
2018-02-21 21:13:48 +08:00
|
|
|
bit IsLds = is_lds;
|
TableGen: Streamline the semantics of NAME
Summary:
The new rules are straightforward. The main rules to keep in mind
are:
1. NAME is an implicit template argument of class and multiclass,
and will be substituted by the name of the instantiating def/defm.
2. The name of a def/defm in a multiclass must contain a reference
to NAME. If such a reference is not present, it is automatically
prepended.
And for some additional subtleties, consider these:
3. defm with no name generates a unique name but has no special
behavior otherwise.
4. def with no name generates an anonymous record, whose name is
unique but undefined. In particular, the name won't contain a
reference to NAME.
Keeping rules 1&2 in mind should allow a predictable behavior of
name resolution that is simple to follow.
The old "rules" were rather surprising: sometimes (but not always),
NAME would correspond to the name of the toplevel defm. They were
also plain bonkers when you pushed them to their limits, as the old
version of the TableGen test case shows.
Having NAME correspond to the name of the toplevel defm introduces
"spooky action at a distance" and breaks composability:
refactoring the upper layers of a hierarchy of nested multiclass
instantiations can cause unexpected breakage by changing the value
of NAME at a lower level of the hierarchy. The new rules don't
suffer from this problem.
Some existing .td files have to be adjusted because they ended up
depending on the details of the old implementation.
Change-Id: I694095231565b30f563e6fd0417b41ee01a12589
Reviewers: tra, simon_tatham, craig.topper, MartinO, arsenm, javed.absar
Subscribers: wdng, llvm-commits
Differential Revision: https://reviews.llvm.org/D47430
llvm-svn: 333900
2018-06-04 22:26:05 +08:00
|
|
|
string OpName = Name;
|
2018-02-21 21:13:48 +08:00
|
|
|
}
|
|
|
|
|
TableGen: Streamline the semantics of NAME
Summary:
The new rules are straightforward. The main rules to keep in mind
are:
1. NAME is an implicit template argument of class and multiclass,
and will be substituted by the name of the instantiating def/defm.
2. The name of a def/defm in a multiclass must contain a reference
to NAME. If such a reference is not present, it is automatically
prepended.
And for some additional subtleties, consider these:
3. defm with no name generates a unique name but has no special
behavior otherwise.
4. def with no name generates an anonymous record, whose name is
unique but undefined. In particular, the name won't contain a
reference to NAME.
Keeping rules 1&2 in mind should allow a predictable behavior of
name resolution that is simple to follow.
The old "rules" were rather surprising: sometimes (but not always),
NAME would correspond to the name of the toplevel defm. They were
also plain bonkers when you pushed them to their limits, as the old
version of the TableGen test case shows.
Having NAME correspond to the name of the toplevel defm introduces
"spooky action at a distance" and breaks composability:
refactoring the upper layers of a hierarchy of nested multiclass
instantiations can cause unexpected breakage by changing the value
of NAME at a lower level of the hierarchy. The new rules don't
suffer from this problem.
Some existing .td files have to be adjusted because they ended up
depending on the details of the old implementation.
Change-Id: I694095231565b30f563e6fd0417b41ee01a12589
Reviewers: tra, simon_tatham, craig.topper, MartinO, arsenm, javed.absar
Subscribers: wdng, llvm-commits
Differential Revision: https://reviews.llvm.org/D47430
llvm-svn: 333900
2018-06-04 22:26:05 +08:00
|
|
|
class MTBUFAddr64Table <bit is_addr64, string Name> {
|
2017-06-23 00:29:22 +08:00
|
|
|
bit IsAddr64 = is_addr64;
|
TableGen: Streamline the semantics of NAME
Summary:
The new rules are straightforward. The main rules to keep in mind
are:
1. NAME is an implicit template argument of class and multiclass,
and will be substituted by the name of the instantiating def/defm.
2. The name of a def/defm in a multiclass must contain a reference
to NAME. If such a reference is not present, it is automatically
prepended.
And for some additional subtleties, consider these:
3. defm with no name generates a unique name but has no special
behavior otherwise.
4. def with no name generates an anonymous record, whose name is
unique but undefined. In particular, the name won't contain a
reference to NAME.
Keeping rules 1&2 in mind should allow a predictable behavior of
name resolution that is simple to follow.
The old "rules" were rather surprising: sometimes (but not always),
NAME would correspond to the name of the toplevel defm. They were
also plain bonkers when you pushed them to their limits, as the old
version of the TableGen test case shows.
Having NAME correspond to the name of the toplevel defm introduces
"spooky action at a distance" and breaks composability:
refactoring the upper layers of a hierarchy of nested multiclass
instantiations can cause unexpected breakage by changing the value
of NAME at a lower level of the hierarchy. The new rules don't
suffer from this problem.
Some existing .td files have to be adjusted because they ended up
depending on the details of the old implementation.
Change-Id: I694095231565b30f563e6fd0417b41ee01a12589
Reviewers: tra, simon_tatham, craig.topper, MartinO, arsenm, javed.absar
Subscribers: wdng, llvm-commits
Differential Revision: https://reviews.llvm.org/D47430
llvm-svn: 333900
2018-06-04 22:26:05 +08:00
|
|
|
string OpName = Name;
|
2017-06-23 00:29:22 +08:00
|
|
|
}
|
|
|
|
|
2016-09-10 21:09:16 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// MTBUF classes
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
class MTBUF_Pseudo <string opName, dag outs, dag ins,
|
|
|
|
string asmOps, list<dag> pattern=[]> :
|
|
|
|
InstSI<outs, ins, "", pattern>,
|
|
|
|
SIMCInstr<opName, SIEncodingFamily.NONE> {
|
|
|
|
|
|
|
|
let isPseudo = 1;
|
|
|
|
let isCodeGenOnly = 1;
|
2016-10-06 18:13:23 +08:00
|
|
|
let Size = 8;
|
2016-09-10 21:09:16 +08:00
|
|
|
let UseNamedOperandTable = 1;
|
|
|
|
|
|
|
|
string Mnemonic = opName;
|
|
|
|
string AsmOperands = asmOps;
|
|
|
|
|
|
|
|
let VM_CNT = 1;
|
|
|
|
let EXP_CNT = 1;
|
|
|
|
let MTBUF = 1;
|
|
|
|
let Uses = [EXEC];
|
|
|
|
let hasSideEffects = 0;
|
|
|
|
let SchedRW = [WriteVMEM];
|
2017-06-23 00:29:22 +08:00
|
|
|
|
|
|
|
let AsmMatchConverter = "cvtMtbuf";
|
|
|
|
|
|
|
|
bits<1> offen = 0;
|
|
|
|
bits<1> idxen = 0;
|
|
|
|
bits<1> addr64 = 0;
|
|
|
|
bits<1> has_vdata = 1;
|
|
|
|
bits<1> has_vaddr = 1;
|
|
|
|
bits<1> has_glc = 1;
|
2019-05-01 06:08:23 +08:00
|
|
|
bits<1> has_dlc = 1;
|
2017-06-23 00:29:22 +08:00
|
|
|
bits<1> glc_value = 0; // the value for glc if no such operand
|
2019-05-01 06:08:23 +08:00
|
|
|
bits<1> dlc_value = 0; // the value for dlc if no such operand
|
2017-06-23 00:29:22 +08:00
|
|
|
bits<1> has_srsrc = 1;
|
|
|
|
bits<1> has_soffset = 1;
|
|
|
|
bits<1> has_offset = 1;
|
|
|
|
bits<1> has_slc = 1;
|
|
|
|
bits<1> has_tfe = 1;
|
2016-09-10 21:09:16 +08:00
|
|
|
}
|
|
|
|
|
2016-09-24 05:21:21 +08:00
|
|
|
class MTBUF_Real <MTBUF_Pseudo ps> :
|
2017-06-23 00:29:22 +08:00
|
|
|
InstSI <ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands, []> {
|
2016-09-10 21:09:16 +08:00
|
|
|
|
|
|
|
let isPseudo = 0;
|
|
|
|
let isCodeGenOnly = 0;
|
|
|
|
|
|
|
|
// copy relevant pseudo op flags
|
|
|
|
let SubtargetPredicate = ps.SubtargetPredicate;
|
|
|
|
let AsmMatchConverter = ps.AsmMatchConverter;
|
|
|
|
let Constraints = ps.Constraints;
|
|
|
|
let DisableEncoding = ps.DisableEncoding;
|
|
|
|
let TSFlags = ps.TSFlags;
|
|
|
|
|
|
|
|
bits<12> offset;
|
2017-06-23 00:29:22 +08:00
|
|
|
bits<1> glc;
|
2019-05-01 06:08:23 +08:00
|
|
|
bits<1> dlc;
|
[AMDGPU] New tbuffer intrinsics
Summary:
This commit adds new intrinsics
llvm.amdgcn.raw.tbuffer.load
llvm.amdgcn.struct.tbuffer.load
llvm.amdgcn.raw.tbuffer.store
llvm.amdgcn.struct.tbuffer.store
with the following changes from the llvm.amdgcn.tbuffer.* intrinsics:
* there are separate raw and struct versions: raw does not have an index
arg and sets idxen=0 in the instruction, and struct always sets
idxen=1 in the instruction even if the index is 0, to allow for the
fact that gfx9 does bounds checking differently depending on whether
idxen is set;
* there is a combined format arg (dfmt+nfmt)
* there is a combined cachepolicy arg (glc+slc)
* there are now only two offset args: one for the offset that is
included in bounds checking and swizzling, to be split between the
instruction's voffset and immoffset fields, and one for the offset
that is excluded from bounds checking and swizzling, to go into the
instruction's soffset field.
The AMDISD::TBUFFER_* SD nodes always have an index operand, all three
offset operands, combined format operand, combined cachepolicy operand,
and an extra idxen operand.
The tbuffer pseudo- and real instructions now also have a combined
format operand.
The obsolescent llvm.amdgcn.tbuffer.* and llvm.SI.tbuffer.store
intrinsics continue to work.
V2: Separate raw and struct intrinsics.
V3: Moved extract_glc and extract_slc defs to a more sensible place.
V4: Rebased on D49995.
V5: Only two separate offset args instead of three.
V6: Pseudo- and real instructions have joint format operand.
V7: Restored optionality of dfmt and nfmt in assembler.
V8: Addressed minor review comments.
Subscribers: arsenm, kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, t-tye, llvm-commits
Differential Revision: https://reviews.llvm.org/D49026
Change-Id: If22ad77e349fac3a5d2f72dda53c010377d470d4
llvm-svn: 340268
2018-08-21 19:06:05 +08:00
|
|
|
bits<7> format;
|
2017-06-23 00:29:22 +08:00
|
|
|
bits<8> vaddr;
|
|
|
|
bits<8> vdata;
|
|
|
|
bits<7> srsrc;
|
|
|
|
bits<1> slc;
|
|
|
|
bits<1> tfe;
|
|
|
|
bits<8> soffset;
|
[AMDGPU] New tbuffer intrinsics
Summary:
This commit adds new intrinsics
llvm.amdgcn.raw.tbuffer.load
llvm.amdgcn.struct.tbuffer.load
llvm.amdgcn.raw.tbuffer.store
llvm.amdgcn.struct.tbuffer.store
with the following changes from the llvm.amdgcn.tbuffer.* intrinsics:
* there are separate raw and struct versions: raw does not have an index
arg and sets idxen=0 in the instruction, and struct always sets
idxen=1 in the instruction even if the index is 0, to allow for the
fact that gfx9 does bounds checking differently depending on whether
idxen is set;
* there is a combined format arg (dfmt+nfmt)
* there is a combined cachepolicy arg (glc+slc)
* there are now only two offset args: one for the offset that is
included in bounds checking and swizzling, to be split between the
instruction's voffset and immoffset fields, and one for the offset
that is excluded from bounds checking and swizzling, to go into the
instruction's soffset field.
The AMDISD::TBUFFER_* SD nodes always have an index operand, all three
offset operands, combined format operand, combined cachepolicy operand,
and an extra idxen operand.
The tbuffer pseudo- and real instructions now also have a combined
format operand.
The obsolescent llvm.amdgcn.tbuffer.* and llvm.SI.tbuffer.store
intrinsics continue to work.
V2: Separate raw and struct intrinsics.
V3: Moved extract_glc and extract_slc defs to a more sensible place.
V4: Rebased on D49995.
V5: Only two separate offset args instead of three.
V6: Pseudo- and real instructions have joint format operand.
V7: Restored optionality of dfmt and nfmt in assembler.
V8: Addressed minor review comments.
Subscribers: arsenm, kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, t-tye, llvm-commits
Differential Revision: https://reviews.llvm.org/D49026
Change-Id: If22ad77e349fac3a5d2f72dda53c010377d470d4
llvm-svn: 340268
2018-08-21 19:06:05 +08:00
|
|
|
|
|
|
|
bits<4> dfmt = format{3-0};
|
|
|
|
bits<3> nfmt = format{6-4};
|
2017-06-23 00:29:22 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
class getMTBUFInsDA<list<RegisterClass> vdataList,
|
|
|
|
list<RegisterClass> vaddrList=[]> {
|
|
|
|
RegisterClass vdataClass = !if(!empty(vdataList), ?, !head(vdataList));
|
|
|
|
RegisterClass vaddrClass = !if(!empty(vaddrList), ?, !head(vaddrList));
|
|
|
|
dag InsNoData = !if(!empty(vaddrList),
|
|
|
|
(ins SReg_128:$srsrc, SCSrc_b32:$soffset,
|
2019-05-01 06:08:23 +08:00
|
|
|
offset:$offset, FORMAT:$format, GLC:$glc, SLC:$slc, TFE:$tfe, DLC:$dlc),
|
2017-06-23 00:29:22 +08:00
|
|
|
(ins vaddrClass:$vaddr, SReg_128:$srsrc, SCSrc_b32:$soffset,
|
2019-05-01 06:08:23 +08:00
|
|
|
offset:$offset, FORMAT:$format, GLC:$glc, SLC:$slc, TFE:$tfe, DLC:$dlc)
|
2017-06-23 00:29:22 +08:00
|
|
|
);
|
|
|
|
dag InsData = !if(!empty(vaddrList),
|
|
|
|
(ins vdataClass:$vdata, SReg_128:$srsrc,
|
[AMDGPU] New tbuffer intrinsics
Summary:
This commit adds new intrinsics
llvm.amdgcn.raw.tbuffer.load
llvm.amdgcn.struct.tbuffer.load
llvm.amdgcn.raw.tbuffer.store
llvm.amdgcn.struct.tbuffer.store
with the following changes from the llvm.amdgcn.tbuffer.* intrinsics:
* there are separate raw and struct versions: raw does not have an index
arg and sets idxen=0 in the instruction, and struct always sets
idxen=1 in the instruction even if the index is 0, to allow for the
fact that gfx9 does bounds checking differently depending on whether
idxen is set;
* there is a combined format arg (dfmt+nfmt)
* there is a combined cachepolicy arg (glc+slc)
* there are now only two offset args: one for the offset that is
included in bounds checking and swizzling, to be split between the
instruction's voffset and immoffset fields, and one for the offset
that is excluded from bounds checking and swizzling, to go into the
instruction's soffset field.
The AMDISD::TBUFFER_* SD nodes always have an index operand, all three
offset operands, combined format operand, combined cachepolicy operand,
and an extra idxen operand.
The tbuffer pseudo- and real instructions now also have a combined
format operand.
The obsolescent llvm.amdgcn.tbuffer.* and llvm.SI.tbuffer.store
intrinsics continue to work.
V2: Separate raw and struct intrinsics.
V3: Moved extract_glc and extract_slc defs to a more sensible place.
V4: Rebased on D49995.
V5: Only two separate offset args instead of three.
V6: Pseudo- and real instructions have joint format operand.
V7: Restored optionality of dfmt and nfmt in assembler.
V8: Addressed minor review comments.
Subscribers: arsenm, kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, t-tye, llvm-commits
Differential Revision: https://reviews.llvm.org/D49026
Change-Id: If22ad77e349fac3a5d2f72dda53c010377d470d4
llvm-svn: 340268
2018-08-21 19:06:05 +08:00
|
|
|
SCSrc_b32:$soffset, offset:$offset, FORMAT:$format, GLC:$glc,
|
2019-05-01 06:08:23 +08:00
|
|
|
SLC:$slc, TFE:$tfe, DLC:$dlc),
|
2017-06-23 00:29:22 +08:00
|
|
|
(ins vdataClass:$vdata, vaddrClass:$vaddr, SReg_128:$srsrc,
|
[AMDGPU] New tbuffer intrinsics
Summary:
This commit adds new intrinsics
llvm.amdgcn.raw.tbuffer.load
llvm.amdgcn.struct.tbuffer.load
llvm.amdgcn.raw.tbuffer.store
llvm.amdgcn.struct.tbuffer.store
with the following changes from the llvm.amdgcn.tbuffer.* intrinsics:
* there are separate raw and struct versions: raw does not have an index
arg and sets idxen=0 in the instruction, and struct always sets
idxen=1 in the instruction even if the index is 0, to allow for the
fact that gfx9 does bounds checking differently depending on whether
idxen is set;
* there is a combined format arg (dfmt+nfmt)
* there is a combined cachepolicy arg (glc+slc)
* there are now only two offset args: one for the offset that is
included in bounds checking and swizzling, to be split between the
instruction's voffset and immoffset fields, and one for the offset
that is excluded from bounds checking and swizzling, to go into the
instruction's soffset field.
The AMDISD::TBUFFER_* SD nodes always have an index operand, all three
offset operands, combined format operand, combined cachepolicy operand,
and an extra idxen operand.
The tbuffer pseudo- and real instructions now also have a combined
format operand.
The obsolescent llvm.amdgcn.tbuffer.* and llvm.SI.tbuffer.store
intrinsics continue to work.
V2: Separate raw and struct intrinsics.
V3: Moved extract_glc and extract_slc defs to a more sensible place.
V4: Rebased on D49995.
V5: Only two separate offset args instead of three.
V6: Pseudo- and real instructions have joint format operand.
V7: Restored optionality of dfmt and nfmt in assembler.
V8: Addressed minor review comments.
Subscribers: arsenm, kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, t-tye, llvm-commits
Differential Revision: https://reviews.llvm.org/D49026
Change-Id: If22ad77e349fac3a5d2f72dda53c010377d470d4
llvm-svn: 340268
2018-08-21 19:06:05 +08:00
|
|
|
SCSrc_b32:$soffset, offset:$offset, FORMAT:$format, GLC:$glc,
|
2019-05-01 06:08:23 +08:00
|
|
|
SLC:$slc, TFE:$tfe, DLC:$dlc)
|
2017-06-23 00:29:22 +08:00
|
|
|
);
|
|
|
|
dag ret = !if(!empty(vdataList), InsNoData, InsData);
|
2016-09-10 21:09:16 +08:00
|
|
|
}
|
|
|
|
|
2017-06-23 00:29:22 +08:00
|
|
|
class getMTBUFIns<int addrKind, list<RegisterClass> vdataList=[]> {
|
|
|
|
dag ret =
|
|
|
|
!if(!eq(addrKind, BUFAddrKind.Offset), getMTBUFInsDA<vdataList>.ret,
|
|
|
|
!if(!eq(addrKind, BUFAddrKind.OffEn), getMTBUFInsDA<vdataList, [VGPR_32]>.ret,
|
|
|
|
!if(!eq(addrKind, BUFAddrKind.IdxEn), getMTBUFInsDA<vdataList, [VGPR_32]>.ret,
|
|
|
|
!if(!eq(addrKind, BUFAddrKind.BothEn), getMTBUFInsDA<vdataList, [VReg_64]>.ret,
|
|
|
|
!if(!eq(addrKind, BUFAddrKind.Addr64), getMTBUFInsDA<vdataList, [VReg_64]>.ret,
|
|
|
|
(ins))))));
|
|
|
|
}
|
|
|
|
|
|
|
|
class getMTBUFAsmOps<int addrKind> {
|
|
|
|
string Pfx =
|
[AMDGPU] New tbuffer intrinsics
Summary:
This commit adds new intrinsics
llvm.amdgcn.raw.tbuffer.load
llvm.amdgcn.struct.tbuffer.load
llvm.amdgcn.raw.tbuffer.store
llvm.amdgcn.struct.tbuffer.store
with the following changes from the llvm.amdgcn.tbuffer.* intrinsics:
* there are separate raw and struct versions: raw does not have an index
arg and sets idxen=0 in the instruction, and struct always sets
idxen=1 in the instruction even if the index is 0, to allow for the
fact that gfx9 does bounds checking differently depending on whether
idxen is set;
* there is a combined format arg (dfmt+nfmt)
* there is a combined cachepolicy arg (glc+slc)
* there are now only two offset args: one for the offset that is
included in bounds checking and swizzling, to be split between the
instruction's voffset and immoffset fields, and one for the offset
that is excluded from bounds checking and swizzling, to go into the
instruction's soffset field.
The AMDISD::TBUFFER_* SD nodes always have an index operand, all three
offset operands, combined format operand, combined cachepolicy operand,
and an extra idxen operand.
The tbuffer pseudo- and real instructions now also have a combined
format operand.
The obsolescent llvm.amdgcn.tbuffer.* and llvm.SI.tbuffer.store
intrinsics continue to work.
V2: Separate raw and struct intrinsics.
V3: Moved extract_glc and extract_slc defs to a more sensible place.
V4: Rebased on D49995.
V5: Only two separate offset args instead of three.
V6: Pseudo- and real instructions have joint format operand.
V7: Restored optionality of dfmt and nfmt in assembler.
V8: Addressed minor review comments.
Subscribers: arsenm, kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, t-tye, llvm-commits
Differential Revision: https://reviews.llvm.org/D49026
Change-Id: If22ad77e349fac3a5d2f72dda53c010377d470d4
llvm-svn: 340268
2018-08-21 19:06:05 +08:00
|
|
|
!if(!eq(addrKind, BUFAddrKind.Offset), "off, $srsrc, $format, $soffset",
|
2017-06-23 00:29:22 +08:00
|
|
|
!if(!eq(addrKind, BUFAddrKind.OffEn),
|
[AMDGPU] New tbuffer intrinsics
Summary:
This commit adds new intrinsics
llvm.amdgcn.raw.tbuffer.load
llvm.amdgcn.struct.tbuffer.load
llvm.amdgcn.raw.tbuffer.store
llvm.amdgcn.struct.tbuffer.store
with the following changes from the llvm.amdgcn.tbuffer.* intrinsics:
* there are separate raw and struct versions: raw does not have an index
arg and sets idxen=0 in the instruction, and struct always sets
idxen=1 in the instruction even if the index is 0, to allow for the
fact that gfx9 does bounds checking differently depending on whether
idxen is set;
* there is a combined format arg (dfmt+nfmt)
* there is a combined cachepolicy arg (glc+slc)
* there are now only two offset args: one for the offset that is
included in bounds checking and swizzling, to be split between the
instruction's voffset and immoffset fields, and one for the offset
that is excluded from bounds checking and swizzling, to go into the
instruction's soffset field.
The AMDISD::TBUFFER_* SD nodes always have an index operand, all three
offset operands, combined format operand, combined cachepolicy operand,
and an extra idxen operand.
The tbuffer pseudo- and real instructions now also have a combined
format operand.
The obsolescent llvm.amdgcn.tbuffer.* and llvm.SI.tbuffer.store
intrinsics continue to work.
V2: Separate raw and struct intrinsics.
V3: Moved extract_glc and extract_slc defs to a more sensible place.
V4: Rebased on D49995.
V5: Only two separate offset args instead of three.
V6: Pseudo- and real instructions have joint format operand.
V7: Restored optionality of dfmt and nfmt in assembler.
V8: Addressed minor review comments.
Subscribers: arsenm, kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, t-tye, llvm-commits
Differential Revision: https://reviews.llvm.org/D49026
Change-Id: If22ad77e349fac3a5d2f72dda53c010377d470d4
llvm-svn: 340268
2018-08-21 19:06:05 +08:00
|
|
|
"$vaddr, $srsrc, $format, $soffset offen",
|
2017-06-23 00:29:22 +08:00
|
|
|
!if(!eq(addrKind, BUFAddrKind.IdxEn),
|
[AMDGPU] New tbuffer intrinsics
Summary:
This commit adds new intrinsics
llvm.amdgcn.raw.tbuffer.load
llvm.amdgcn.struct.tbuffer.load
llvm.amdgcn.raw.tbuffer.store
llvm.amdgcn.struct.tbuffer.store
with the following changes from the llvm.amdgcn.tbuffer.* intrinsics:
* there are separate raw and struct versions: raw does not have an index
arg and sets idxen=0 in the instruction, and struct always sets
idxen=1 in the instruction even if the index is 0, to allow for the
fact that gfx9 does bounds checking differently depending on whether
idxen is set;
* there is a combined format arg (dfmt+nfmt)
* there is a combined cachepolicy arg (glc+slc)
* there are now only two offset args: one for the offset that is
included in bounds checking and swizzling, to be split between the
instruction's voffset and immoffset fields, and one for the offset
that is excluded from bounds checking and swizzling, to go into the
instruction's soffset field.
The AMDISD::TBUFFER_* SD nodes always have an index operand, all three
offset operands, combined format operand, combined cachepolicy operand,
and an extra idxen operand.
The tbuffer pseudo- and real instructions now also have a combined
format operand.
The obsolescent llvm.amdgcn.tbuffer.* and llvm.SI.tbuffer.store
intrinsics continue to work.
V2: Separate raw and struct intrinsics.
V3: Moved extract_glc and extract_slc defs to a more sensible place.
V4: Rebased on D49995.
V5: Only two separate offset args instead of three.
V6: Pseudo- and real instructions have joint format operand.
V7: Restored optionality of dfmt and nfmt in assembler.
V8: Addressed minor review comments.
Subscribers: arsenm, kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, t-tye, llvm-commits
Differential Revision: https://reviews.llvm.org/D49026
Change-Id: If22ad77e349fac3a5d2f72dda53c010377d470d4
llvm-svn: 340268
2018-08-21 19:06:05 +08:00
|
|
|
"$vaddr, $srsrc, $format, $soffset idxen",
|
2017-06-23 00:29:22 +08:00
|
|
|
!if(!eq(addrKind, BUFAddrKind.BothEn),
|
[AMDGPU] New tbuffer intrinsics
Summary:
This commit adds new intrinsics
llvm.amdgcn.raw.tbuffer.load
llvm.amdgcn.struct.tbuffer.load
llvm.amdgcn.raw.tbuffer.store
llvm.amdgcn.struct.tbuffer.store
with the following changes from the llvm.amdgcn.tbuffer.* intrinsics:
* there are separate raw and struct versions: raw does not have an index
arg and sets idxen=0 in the instruction, and struct always sets
idxen=1 in the instruction even if the index is 0, to allow for the
fact that gfx9 does bounds checking differently depending on whether
idxen is set;
* there is a combined format arg (dfmt+nfmt)
* there is a combined cachepolicy arg (glc+slc)
* there are now only two offset args: one for the offset that is
included in bounds checking and swizzling, to be split between the
instruction's voffset and immoffset fields, and one for the offset
that is excluded from bounds checking and swizzling, to go into the
instruction's soffset field.
The AMDISD::TBUFFER_* SD nodes always have an index operand, all three
offset operands, combined format operand, combined cachepolicy operand,
and an extra idxen operand.
The tbuffer pseudo- and real instructions now also have a combined
format operand.
The obsolescent llvm.amdgcn.tbuffer.* and llvm.SI.tbuffer.store
intrinsics continue to work.
V2: Separate raw and struct intrinsics.
V3: Moved extract_glc and extract_slc defs to a more sensible place.
V4: Rebased on D49995.
V5: Only two separate offset args instead of three.
V6: Pseudo- and real instructions have joint format operand.
V7: Restored optionality of dfmt and nfmt in assembler.
V8: Addressed minor review comments.
Subscribers: arsenm, kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, t-tye, llvm-commits
Differential Revision: https://reviews.llvm.org/D49026
Change-Id: If22ad77e349fac3a5d2f72dda53c010377d470d4
llvm-svn: 340268
2018-08-21 19:06:05 +08:00
|
|
|
"$vaddr, $srsrc, $format, $soffset idxen offen",
|
2017-06-23 00:29:22 +08:00
|
|
|
!if(!eq(addrKind, BUFAddrKind.Addr64),
|
[AMDGPU] New tbuffer intrinsics
Summary:
This commit adds new intrinsics
llvm.amdgcn.raw.tbuffer.load
llvm.amdgcn.struct.tbuffer.load
llvm.amdgcn.raw.tbuffer.store
llvm.amdgcn.struct.tbuffer.store
with the following changes from the llvm.amdgcn.tbuffer.* intrinsics:
* there are separate raw and struct versions: raw does not have an index
arg and sets idxen=0 in the instruction, and struct always sets
idxen=1 in the instruction even if the index is 0, to allow for the
fact that gfx9 does bounds checking differently depending on whether
idxen is set;
* there is a combined format arg (dfmt+nfmt)
* there is a combined cachepolicy arg (glc+slc)
* there are now only two offset args: one for the offset that is
included in bounds checking and swizzling, to be split between the
instruction's voffset and immoffset fields, and one for the offset
that is excluded from bounds checking and swizzling, to go into the
instruction's soffset field.
The AMDISD::TBUFFER_* SD nodes always have an index operand, all three
offset operands, combined format operand, combined cachepolicy operand,
and an extra idxen operand.
The tbuffer pseudo- and real instructions now also have a combined
format operand.
The obsolescent llvm.amdgcn.tbuffer.* and llvm.SI.tbuffer.store
intrinsics continue to work.
V2: Separate raw and struct intrinsics.
V3: Moved extract_glc and extract_slc defs to a more sensible place.
V4: Rebased on D49995.
V5: Only two separate offset args instead of three.
V6: Pseudo- and real instructions have joint format operand.
V7: Restored optionality of dfmt and nfmt in assembler.
V8: Addressed minor review comments.
Subscribers: arsenm, kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, t-tye, llvm-commits
Differential Revision: https://reviews.llvm.org/D49026
Change-Id: If22ad77e349fac3a5d2f72dda53c010377d470d4
llvm-svn: 340268
2018-08-21 19:06:05 +08:00
|
|
|
"$vaddr, $srsrc, $format, $soffset addr64",
|
2017-06-23 00:29:22 +08:00
|
|
|
"")))));
|
|
|
|
string ret = Pfx # "$offset";
|
|
|
|
}
|
|
|
|
|
|
|
|
class MTBUF_SetupAddr<int addrKind> {
|
|
|
|
bits<1> offen = !if(!eq(addrKind, BUFAddrKind.OffEn), 1,
|
|
|
|
!if(!eq(addrKind, BUFAddrKind.BothEn), 1 , 0));
|
|
|
|
|
|
|
|
bits<1> idxen = !if(!eq(addrKind, BUFAddrKind.IdxEn), 1,
|
|
|
|
!if(!eq(addrKind, BUFAddrKind.BothEn), 1 , 0));
|
|
|
|
|
|
|
|
bits<1> addr64 = !if(!eq(addrKind, BUFAddrKind.Addr64), 1, 0);
|
|
|
|
|
|
|
|
bits<1> has_vaddr = !if(!eq(addrKind, BUFAddrKind.Offset), 0, 1);
|
|
|
|
}
|
|
|
|
|
|
|
|
class MTBUF_Load_Pseudo <string opName,
|
|
|
|
int addrKind,
|
|
|
|
RegisterClass vdataClass,
|
|
|
|
list<dag> pattern=[],
|
|
|
|
// Workaround bug bz30254
|
|
|
|
int addrKindCopy = addrKind>
|
|
|
|
: MTBUF_Pseudo<opName,
|
|
|
|
(outs vdataClass:$vdata),
|
|
|
|
getMTBUFIns<addrKindCopy>.ret,
|
2019-05-01 06:08:23 +08:00
|
|
|
" $vdata, " # getMTBUFAsmOps<addrKindCopy>.ret # "$glc$slc$tfe$dlc",
|
2017-06-23 00:29:22 +08:00
|
|
|
pattern>,
|
|
|
|
MTBUF_SetupAddr<addrKindCopy> {
|
|
|
|
let PseudoInstr = opName # "_" # getAddrName<addrKindCopy>.ret;
|
2016-09-10 21:09:16 +08:00
|
|
|
let mayLoad = 1;
|
|
|
|
let mayStore = 0;
|
|
|
|
}
|
|
|
|
|
2017-06-23 00:29:22 +08:00
|
|
|
multiclass MTBUF_Pseudo_Loads<string opName, RegisterClass vdataClass,
|
|
|
|
ValueType load_vt = i32,
|
|
|
|
SDPatternOperator ld = null_frag> {
|
|
|
|
|
|
|
|
def _OFFSET : MTBUF_Load_Pseudo <opName, BUFAddrKind.Offset, vdataClass,
|
|
|
|
[(set load_vt:$vdata,
|
[AMDGPU] New tbuffer intrinsics
Summary:
This commit adds new intrinsics
llvm.amdgcn.raw.tbuffer.load
llvm.amdgcn.struct.tbuffer.load
llvm.amdgcn.raw.tbuffer.store
llvm.amdgcn.struct.tbuffer.store
with the following changes from the llvm.amdgcn.tbuffer.* intrinsics:
* there are separate raw and struct versions: raw does not have an index
arg and sets idxen=0 in the instruction, and struct always sets
idxen=1 in the instruction even if the index is 0, to allow for the
fact that gfx9 does bounds checking differently depending on whether
idxen is set;
* there is a combined format arg (dfmt+nfmt)
* there is a combined cachepolicy arg (glc+slc)
* there are now only two offset args: one for the offset that is
included in bounds checking and swizzling, to be split between the
instruction's voffset and immoffset fields, and one for the offset
that is excluded from bounds checking and swizzling, to go into the
instruction's soffset field.
The AMDISD::TBUFFER_* SD nodes always have an index operand, all three
offset operands, combined format operand, combined cachepolicy operand,
and an extra idxen operand.
The tbuffer pseudo- and real instructions now also have a combined
format operand.
The obsolescent llvm.amdgcn.tbuffer.* and llvm.SI.tbuffer.store
intrinsics continue to work.
V2: Separate raw and struct intrinsics.
V3: Moved extract_glc and extract_slc defs to a more sensible place.
V4: Rebased on D49995.
V5: Only two separate offset args instead of three.
V6: Pseudo- and real instructions have joint format operand.
V7: Restored optionality of dfmt and nfmt in assembler.
V8: Addressed minor review comments.
Subscribers: arsenm, kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, t-tye, llvm-commits
Differential Revision: https://reviews.llvm.org/D49026
Change-Id: If22ad77e349fac3a5d2f72dda53c010377d470d4
llvm-svn: 340268
2018-08-21 19:06:05 +08:00
|
|
|
(ld (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset, i8:$format,
|
2019-05-01 06:08:23 +08:00
|
|
|
i1:$glc, i1:$slc, i1:$tfe, i1:$dlc)))]>,
|
TableGen: Streamline the semantics of NAME
Summary:
The new rules are straightforward. The main rules to keep in mind
are:
1. NAME is an implicit template argument of class and multiclass,
and will be substituted by the name of the instantiating def/defm.
2. The name of a def/defm in a multiclass must contain a reference
to NAME. If such a reference is not present, it is automatically
prepended.
And for some additional subtleties, consider these:
3. defm with no name generates a unique name but has no special
behavior otherwise.
4. def with no name generates an anonymous record, whose name is
unique but undefined. In particular, the name won't contain a
reference to NAME.
Keeping rules 1&2 in mind should allow a predictable behavior of
name resolution that is simple to follow.
The old "rules" were rather surprising: sometimes (but not always),
NAME would correspond to the name of the toplevel defm. They were
also plain bonkers when you pushed them to their limits, as the old
version of the TableGen test case shows.
Having NAME correspond to the name of the toplevel defm introduces
"spooky action at a distance" and breaks composability:
refactoring the upper layers of a hierarchy of nested multiclass
instantiations can cause unexpected breakage by changing the value
of NAME at a lower level of the hierarchy. The new rules don't
suffer from this problem.
Some existing .td files have to be adjusted because they ended up
depending on the details of the old implementation.
Change-Id: I694095231565b30f563e6fd0417b41ee01a12589
Reviewers: tra, simon_tatham, craig.topper, MartinO, arsenm, javed.absar
Subscribers: wdng, llvm-commits
Differential Revision: https://reviews.llvm.org/D47430
llvm-svn: 333900
2018-06-04 22:26:05 +08:00
|
|
|
MTBUFAddr64Table<0, NAME>;
|
2017-06-23 00:29:22 +08:00
|
|
|
|
|
|
|
def _ADDR64 : MTBUF_Load_Pseudo <opName, BUFAddrKind.Addr64, vdataClass,
|
|
|
|
[(set load_vt:$vdata,
|
|
|
|
(ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset,
|
2019-05-01 06:08:23 +08:00
|
|
|
i8:$format, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc)))]>,
|
TableGen: Streamline the semantics of NAME
Summary:
The new rules are straightforward. The main rules to keep in mind
are:
1. NAME is an implicit template argument of class and multiclass,
and will be substituted by the name of the instantiating def/defm.
2. The name of a def/defm in a multiclass must contain a reference
to NAME. If such a reference is not present, it is automatically
prepended.
And for some additional subtleties, consider these:
3. defm with no name generates a unique name but has no special
behavior otherwise.
4. def with no name generates an anonymous record, whose name is
unique but undefined. In particular, the name won't contain a
reference to NAME.
Keeping rules 1&2 in mind should allow a predictable behavior of
name resolution that is simple to follow.
The old "rules" were rather surprising: sometimes (but not always),
NAME would correspond to the name of the toplevel defm. They were
also plain bonkers when you pushed them to their limits, as the old
version of the TableGen test case shows.
Having NAME correspond to the name of the toplevel defm introduces
"spooky action at a distance" and breaks composability:
refactoring the upper layers of a hierarchy of nested multiclass
instantiations can cause unexpected breakage by changing the value
of NAME at a lower level of the hierarchy. The new rules don't
suffer from this problem.
Some existing .td files have to be adjusted because they ended up
depending on the details of the old implementation.
Change-Id: I694095231565b30f563e6fd0417b41ee01a12589
Reviewers: tra, simon_tatham, craig.topper, MartinO, arsenm, javed.absar
Subscribers: wdng, llvm-commits
Differential Revision: https://reviews.llvm.org/D47430
llvm-svn: 333900
2018-06-04 22:26:05 +08:00
|
|
|
MTBUFAddr64Table<1, NAME>;
|
2017-06-23 00:29:22 +08:00
|
|
|
|
|
|
|
def _OFFEN : MTBUF_Load_Pseudo <opName, BUFAddrKind.OffEn, vdataClass>;
|
|
|
|
def _IDXEN : MTBUF_Load_Pseudo <opName, BUFAddrKind.IdxEn, vdataClass>;
|
|
|
|
def _BOTHEN : MTBUF_Load_Pseudo <opName, BUFAddrKind.BothEn, vdataClass>;
|
|
|
|
|
|
|
|
let DisableWQM = 1 in {
|
|
|
|
def _OFFSET_exact : MTBUF_Load_Pseudo <opName, BUFAddrKind.Offset, vdataClass>;
|
|
|
|
def _OFFEN_exact : MTBUF_Load_Pseudo <opName, BUFAddrKind.OffEn, vdataClass>;
|
|
|
|
def _IDXEN_exact : MTBUF_Load_Pseudo <opName, BUFAddrKind.IdxEn, vdataClass>;
|
|
|
|
def _BOTHEN_exact : MTBUF_Load_Pseudo <opName, BUFAddrKind.BothEn, vdataClass>;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
class MTBUF_Store_Pseudo <string opName,
|
|
|
|
int addrKind,
|
|
|
|
RegisterClass vdataClass,
|
|
|
|
list<dag> pattern=[],
|
|
|
|
// Workaround bug bz30254
|
|
|
|
int addrKindCopy = addrKind,
|
|
|
|
RegisterClass vdataClassCopy = vdataClass>
|
|
|
|
: MTBUF_Pseudo<opName,
|
|
|
|
(outs),
|
|
|
|
getMTBUFIns<addrKindCopy, [vdataClassCopy]>.ret,
|
2019-05-01 06:08:23 +08:00
|
|
|
" $vdata, " # getMTBUFAsmOps<addrKindCopy>.ret # "$glc$slc$tfe$dlc",
|
2017-06-23 00:29:22 +08:00
|
|
|
pattern>,
|
|
|
|
MTBUF_SetupAddr<addrKindCopy> {
|
|
|
|
let PseudoInstr = opName # "_" # getAddrName<addrKindCopy>.ret;
|
2016-09-10 21:09:16 +08:00
|
|
|
let mayLoad = 0;
|
|
|
|
let mayStore = 1;
|
|
|
|
}
|
|
|
|
|
2017-06-23 00:29:22 +08:00
|
|
|
multiclass MTBUF_Pseudo_Stores<string opName, RegisterClass vdataClass,
|
|
|
|
ValueType store_vt = i32,
|
|
|
|
SDPatternOperator st = null_frag> {
|
|
|
|
|
|
|
|
def _OFFSET : MTBUF_Store_Pseudo <opName, BUFAddrKind.Offset, vdataClass,
|
|
|
|
[(st store_vt:$vdata, (MUBUFOffset v4i32:$srsrc, i32:$soffset,
|
[AMDGPU] New tbuffer intrinsics
Summary:
This commit adds new intrinsics
llvm.amdgcn.raw.tbuffer.load
llvm.amdgcn.struct.tbuffer.load
llvm.amdgcn.raw.tbuffer.store
llvm.amdgcn.struct.tbuffer.store
with the following changes from the llvm.amdgcn.tbuffer.* intrinsics:
* there are separate raw and struct versions: raw does not have an index
arg and sets idxen=0 in the instruction, and struct always sets
idxen=1 in the instruction even if the index is 0, to allow for the
fact that gfx9 does bounds checking differently depending on whether
idxen is set;
* there is a combined format arg (dfmt+nfmt)
* there is a combined cachepolicy arg (glc+slc)
* there are now only two offset args: one for the offset that is
included in bounds checking and swizzling, to be split between the
instruction's voffset and immoffset fields, and one for the offset
that is excluded from bounds checking and swizzling, to go into the
instruction's soffset field.
The AMDISD::TBUFFER_* SD nodes always have an index operand, all three
offset operands, combined format operand, combined cachepolicy operand,
and an extra idxen operand.
The tbuffer pseudo- and real instructions now also have a combined
format operand.
The obsolescent llvm.amdgcn.tbuffer.* and llvm.SI.tbuffer.store
intrinsics continue to work.
V2: Separate raw and struct intrinsics.
V3: Moved extract_glc and extract_slc defs to a more sensible place.
V4: Rebased on D49995.
V5: Only two separate offset args instead of three.
V6: Pseudo- and real instructions have joint format operand.
V7: Restored optionality of dfmt and nfmt in assembler.
V8: Addressed minor review comments.
Subscribers: arsenm, kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, t-tye, llvm-commits
Differential Revision: https://reviews.llvm.org/D49026
Change-Id: If22ad77e349fac3a5d2f72dda53c010377d470d4
llvm-svn: 340268
2018-08-21 19:06:05 +08:00
|
|
|
i16:$offset, i8:$format, i1:$glc,
|
2019-05-01 06:08:23 +08:00
|
|
|
i1:$slc, i1:$tfe, i1:$dlc))]>,
|
TableGen: Streamline the semantics of NAME
Summary:
The new rules are straightforward. The main rules to keep in mind
are:
1. NAME is an implicit template argument of class and multiclass,
and will be substituted by the name of the instantiating def/defm.
2. The name of a def/defm in a multiclass must contain a reference
to NAME. If such a reference is not present, it is automatically
prepended.
And for some additional subtleties, consider these:
3. defm with no name generates a unique name but has no special
behavior otherwise.
4. def with no name generates an anonymous record, whose name is
unique but undefined. In particular, the name won't contain a
reference to NAME.
Keeping rules 1&2 in mind should allow a predictable behavior of
name resolution that is simple to follow.
The old "rules" were rather surprising: sometimes (but not always),
NAME would correspond to the name of the toplevel defm. They were
also plain bonkers when you pushed them to their limits, as the old
version of the TableGen test case shows.
Having NAME correspond to the name of the toplevel defm introduces
"spooky action at a distance" and breaks composability:
refactoring the upper layers of a hierarchy of nested multiclass
instantiations can cause unexpected breakage by changing the value
of NAME at a lower level of the hierarchy. The new rules don't
suffer from this problem.
Some existing .td files have to be adjusted because they ended up
depending on the details of the old implementation.
Change-Id: I694095231565b30f563e6fd0417b41ee01a12589
Reviewers: tra, simon_tatham, craig.topper, MartinO, arsenm, javed.absar
Subscribers: wdng, llvm-commits
Differential Revision: https://reviews.llvm.org/D47430
llvm-svn: 333900
2018-06-04 22:26:05 +08:00
|
|
|
MTBUFAddr64Table<0, NAME>;
|
2017-06-23 00:29:22 +08:00
|
|
|
|
|
|
|
def _ADDR64 : MTBUF_Store_Pseudo <opName, BUFAddrKind.Addr64, vdataClass,
|
|
|
|
[(st store_vt:$vdata, (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset,
|
[AMDGPU] New tbuffer intrinsics
Summary:
This commit adds new intrinsics
llvm.amdgcn.raw.tbuffer.load
llvm.amdgcn.struct.tbuffer.load
llvm.amdgcn.raw.tbuffer.store
llvm.amdgcn.struct.tbuffer.store
with the following changes from the llvm.amdgcn.tbuffer.* intrinsics:
* there are separate raw and struct versions: raw does not have an index
arg and sets idxen=0 in the instruction, and struct always sets
idxen=1 in the instruction even if the index is 0, to allow for the
fact that gfx9 does bounds checking differently depending on whether
idxen is set;
* there is a combined format arg (dfmt+nfmt)
* there is a combined cachepolicy arg (glc+slc)
* there are now only two offset args: one for the offset that is
included in bounds checking and swizzling, to be split between the
instruction's voffset and immoffset fields, and one for the offset
that is excluded from bounds checking and swizzling, to go into the
instruction's soffset field.
The AMDISD::TBUFFER_* SD nodes always have an index operand, all three
offset operands, combined format operand, combined cachepolicy operand,
and an extra idxen operand.
The tbuffer pseudo- and real instructions now also have a combined
format operand.
The obsolescent llvm.amdgcn.tbuffer.* and llvm.SI.tbuffer.store
intrinsics continue to work.
V2: Separate raw and struct intrinsics.
V3: Moved extract_glc and extract_slc defs to a more sensible place.
V4: Rebased on D49995.
V5: Only two separate offset args instead of three.
V6: Pseudo- and real instructions have joint format operand.
V7: Restored optionality of dfmt and nfmt in assembler.
V8: Addressed minor review comments.
Subscribers: arsenm, kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, t-tye, llvm-commits
Differential Revision: https://reviews.llvm.org/D49026
Change-Id: If22ad77e349fac3a5d2f72dda53c010377d470d4
llvm-svn: 340268
2018-08-21 19:06:05 +08:00
|
|
|
i16:$offset, i8:$format, i1:$glc,
|
2019-05-01 06:08:23 +08:00
|
|
|
i1:$slc, i1:$tfe, i1:$dlc))]>,
|
TableGen: Streamline the semantics of NAME
Summary:
The new rules are straightforward. The main rules to keep in mind
are:
1. NAME is an implicit template argument of class and multiclass,
and will be substituted by the name of the instantiating def/defm.
2. The name of a def/defm in a multiclass must contain a reference
to NAME. If such a reference is not present, it is automatically
prepended.
And for some additional subtleties, consider these:
3. defm with no name generates a unique name but has no special
behavior otherwise.
4. def with no name generates an anonymous record, whose name is
unique but undefined. In particular, the name won't contain a
reference to NAME.
Keeping rules 1&2 in mind should allow a predictable behavior of
name resolution that is simple to follow.
The old "rules" were rather surprising: sometimes (but not always),
NAME would correspond to the name of the toplevel defm. They were
also plain bonkers when you pushed them to their limits, as the old
version of the TableGen test case shows.
Having NAME correspond to the name of the toplevel defm introduces
"spooky action at a distance" and breaks composability:
refactoring the upper layers of a hierarchy of nested multiclass
instantiations can cause unexpected breakage by changing the value
of NAME at a lower level of the hierarchy. The new rules don't
suffer from this problem.
Some existing .td files have to be adjusted because they ended up
depending on the details of the old implementation.
Change-Id: I694095231565b30f563e6fd0417b41ee01a12589
Reviewers: tra, simon_tatham, craig.topper, MartinO, arsenm, javed.absar
Subscribers: wdng, llvm-commits
Differential Revision: https://reviews.llvm.org/D47430
llvm-svn: 333900
2018-06-04 22:26:05 +08:00
|
|
|
MTBUFAddr64Table<1, NAME>;
|
2017-06-23 00:29:22 +08:00
|
|
|
|
|
|
|
def _OFFEN : MTBUF_Store_Pseudo <opName, BUFAddrKind.OffEn, vdataClass>;
|
|
|
|
def _IDXEN : MTBUF_Store_Pseudo <opName, BUFAddrKind.IdxEn, vdataClass>;
|
|
|
|
def _BOTHEN : MTBUF_Store_Pseudo <opName, BUFAddrKind.BothEn, vdataClass>;
|
|
|
|
|
|
|
|
let DisableWQM = 1 in {
|
|
|
|
def _OFFSET_exact : MTBUF_Store_Pseudo <opName, BUFAddrKind.Offset, vdataClass>;
|
|
|
|
def _OFFEN_exact : MTBUF_Store_Pseudo <opName, BUFAddrKind.OffEn, vdataClass>;
|
|
|
|
def _IDXEN_exact : MTBUF_Store_Pseudo <opName, BUFAddrKind.IdxEn, vdataClass>;
|
|
|
|
def _BOTHEN_exact : MTBUF_Store_Pseudo <opName, BUFAddrKind.BothEn, vdataClass>;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2016-09-10 21:09:16 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// MUBUF classes
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2018-12-13 00:15:21 +08:00
|
|
|
class MUBUFGetBaseOpcode<string Op> {
|
|
|
|
string ret = !subst("DWORDX2", "DWORD",
|
|
|
|
!subst("DWORDX3", "DWORD",
|
|
|
|
!subst("DWORDX4", "DWORD", Op)));
|
|
|
|
}
|
|
|
|
|
2016-09-10 21:09:16 +08:00
|
|
|
class MUBUF_Pseudo <string opName, dag outs, dag ins,
|
|
|
|
string asmOps, list<dag> pattern=[]> :
|
|
|
|
InstSI<outs, ins, "", pattern>,
|
|
|
|
SIMCInstr<opName, SIEncodingFamily.NONE> {
|
|
|
|
|
|
|
|
let isPseudo = 1;
|
|
|
|
let isCodeGenOnly = 1;
|
2016-10-06 18:13:23 +08:00
|
|
|
let Size = 8;
|
2016-09-10 21:09:16 +08:00
|
|
|
let UseNamedOperandTable = 1;
|
|
|
|
|
|
|
|
string Mnemonic = opName;
|
|
|
|
string AsmOperands = asmOps;
|
|
|
|
|
2018-12-13 00:15:21 +08:00
|
|
|
Instruction Opcode = !cast<Instruction>(NAME);
|
|
|
|
Instruction BaseOpcode = !cast<Instruction>(MUBUFGetBaseOpcode<NAME>.ret);
|
|
|
|
|
2016-09-10 21:09:16 +08:00
|
|
|
let VM_CNT = 1;
|
|
|
|
let EXP_CNT = 1;
|
|
|
|
let MUBUF = 1;
|
|
|
|
let Uses = [EXEC];
|
|
|
|
let hasSideEffects = 0;
|
|
|
|
let SchedRW = [WriteVMEM];
|
|
|
|
|
|
|
|
let AsmMatchConverter = "cvtMubuf";
|
|
|
|
|
|
|
|
bits<1> offen = 0;
|
|
|
|
bits<1> idxen = 0;
|
|
|
|
bits<1> addr64 = 0;
|
2018-02-21 21:13:48 +08:00
|
|
|
bits<1> lds = 0;
|
2016-09-10 21:09:16 +08:00
|
|
|
bits<1> has_vdata = 1;
|
|
|
|
bits<1> has_vaddr = 1;
|
|
|
|
bits<1> has_glc = 1;
|
2019-05-01 06:08:23 +08:00
|
|
|
bits<1> has_dlc = 1;
|
2016-09-10 21:09:16 +08:00
|
|
|
bits<1> glc_value = 0; // the value for glc if no such operand
|
2019-05-01 06:08:23 +08:00
|
|
|
bits<1> dlc_value = 0; // the value for dlc if no such operand
|
2016-09-10 21:09:16 +08:00
|
|
|
bits<1> has_srsrc = 1;
|
|
|
|
bits<1> has_soffset = 1;
|
|
|
|
bits<1> has_offset = 1;
|
|
|
|
bits<1> has_slc = 1;
|
|
|
|
bits<1> has_tfe = 1;
|
2019-08-18 08:20:43 +08:00
|
|
|
bits<4> elements = 0;
|
2016-09-10 21:09:16 +08:00
|
|
|
}
|
|
|
|
|
2019-05-01 06:08:23 +08:00
|
|
|
class MUBUF_Real <MUBUF_Pseudo ps> :
|
2016-09-10 21:09:16 +08:00
|
|
|
InstSI <ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands, []> {
|
|
|
|
|
|
|
|
let isPseudo = 0;
|
|
|
|
let isCodeGenOnly = 0;
|
|
|
|
|
|
|
|
// copy relevant pseudo op flags
|
|
|
|
let SubtargetPredicate = ps.SubtargetPredicate;
|
|
|
|
let AsmMatchConverter = ps.AsmMatchConverter;
|
|
|
|
let Constraints = ps.Constraints;
|
|
|
|
let DisableEncoding = ps.DisableEncoding;
|
|
|
|
let TSFlags = ps.TSFlags;
|
|
|
|
|
|
|
|
bits<12> offset;
|
|
|
|
bits<1> glc;
|
2019-05-01 06:08:23 +08:00
|
|
|
bits<1> dlc;
|
2016-09-10 21:09:16 +08:00
|
|
|
bits<8> vaddr;
|
|
|
|
bits<8> vdata;
|
|
|
|
bits<7> srsrc;
|
|
|
|
bits<1> slc;
|
|
|
|
bits<1> tfe;
|
|
|
|
bits<8> soffset;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// For cache invalidation instructions.
|
2019-05-01 06:08:23 +08:00
|
|
|
class MUBUF_Invalidate <string opName, SDPatternOperator node = null_frag> :
|
2016-09-10 21:09:16 +08:00
|
|
|
MUBUF_Pseudo<opName, (outs), (ins), "", [(node)]> {
|
|
|
|
|
|
|
|
let AsmMatchConverter = "";
|
|
|
|
|
|
|
|
let hasSideEffects = 1;
|
|
|
|
let mayStore = 1;
|
|
|
|
|
|
|
|
// Set everything to 0.
|
|
|
|
let offen = 0;
|
|
|
|
let idxen = 0;
|
|
|
|
let addr64 = 0;
|
|
|
|
let has_vdata = 0;
|
|
|
|
let has_vaddr = 0;
|
|
|
|
let has_glc = 0;
|
2019-05-01 06:08:23 +08:00
|
|
|
let has_dlc = 0;
|
2016-09-10 21:09:16 +08:00
|
|
|
let glc_value = 0;
|
2019-05-01 06:08:23 +08:00
|
|
|
let dlc_value = 0;
|
2016-09-10 21:09:16 +08:00
|
|
|
let has_srsrc = 0;
|
|
|
|
let has_soffset = 0;
|
|
|
|
let has_offset = 0;
|
|
|
|
let has_slc = 0;
|
|
|
|
let has_tfe = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
class getMUBUFInsDA<list<RegisterClass> vdataList,
|
2018-02-21 21:13:48 +08:00
|
|
|
list<RegisterClass> vaddrList=[],
|
|
|
|
bit isLds = 0> {
|
2016-09-10 21:09:16 +08:00
|
|
|
RegisterClass vdataClass = !if(!empty(vdataList), ?, !head(vdataList));
|
|
|
|
RegisterClass vaddrClass = !if(!empty(vaddrList), ?, !head(vaddrList));
|
|
|
|
dag InsNoData = !if(!empty(vaddrList),
|
|
|
|
(ins SReg_128:$srsrc, SCSrc_b32:$soffset,
|
2018-06-04 22:45:20 +08:00
|
|
|
offset:$offset, GLC:$glc, SLC:$slc),
|
2016-09-10 21:09:16 +08:00
|
|
|
(ins vaddrClass:$vaddr, SReg_128:$srsrc, SCSrc_b32:$soffset,
|
2018-06-04 22:45:20 +08:00
|
|
|
offset:$offset, GLC:$glc, SLC:$slc)
|
2016-09-10 21:09:16 +08:00
|
|
|
);
|
|
|
|
dag InsData = !if(!empty(vaddrList),
|
|
|
|
(ins vdataClass:$vdata, SReg_128:$srsrc,
|
2018-06-04 22:45:20 +08:00
|
|
|
SCSrc_b32:$soffset, offset:$offset, GLC:$glc, SLC:$slc),
|
2016-09-10 21:09:16 +08:00
|
|
|
(ins vdataClass:$vdata, vaddrClass:$vaddr, SReg_128:$srsrc,
|
2018-06-04 22:45:20 +08:00
|
|
|
SCSrc_b32:$soffset, offset:$offset, GLC:$glc, SLC:$slc)
|
2016-09-10 21:09:16 +08:00
|
|
|
);
|
2018-02-21 21:13:48 +08:00
|
|
|
dag ret = !con(
|
|
|
|
!if(!empty(vdataList), InsNoData, InsData),
|
2019-05-01 06:08:23 +08:00
|
|
|
!if(isLds, (ins DLC:$dlc), (ins TFE:$tfe, DLC:$dlc))
|
2018-02-21 21:13:48 +08:00
|
|
|
);
|
2016-09-10 21:09:16 +08:00
|
|
|
}
|
|
|
|
|
2019-08-18 08:20:43 +08:00
|
|
|
class getMUBUFElements<ValueType vt> {
|
|
|
|
// eq does not support ValueType for some reason.
|
|
|
|
string vtAsStr = !cast<string>(vt);
|
|
|
|
|
2018-12-13 00:15:21 +08:00
|
|
|
int ret =
|
2019-08-18 08:20:43 +08:00
|
|
|
!if(!eq(vtAsStr, "f16"), 1,
|
|
|
|
!if(!eq(vtAsStr, "v2f16"), 2,
|
|
|
|
!if(!eq(vtAsStr, "v3f16"), 3,
|
|
|
|
!if(!eq(vtAsStr, "v4f16"), 4,
|
|
|
|
!if(!eq(vt.Size, 32), 1,
|
|
|
|
!if(!eq(vt.Size, 64), 2,
|
|
|
|
!if(!eq(vt.Size, 96), 3,
|
|
|
|
!if(!eq(vt.Size, 128), 4, 0)
|
|
|
|
)
|
|
|
|
)
|
|
|
|
)
|
|
|
|
)
|
|
|
|
)
|
|
|
|
)
|
|
|
|
);
|
2018-12-13 00:15:21 +08:00
|
|
|
}
|
|
|
|
|
2018-02-21 21:13:48 +08:00
|
|
|
class getMUBUFIns<int addrKind, list<RegisterClass> vdataList=[], bit isLds = 0> {
|
2016-09-10 21:09:16 +08:00
|
|
|
dag ret =
|
2018-02-21 21:13:48 +08:00
|
|
|
!if(!eq(addrKind, BUFAddrKind.Offset), getMUBUFInsDA<vdataList, [], isLds>.ret,
|
|
|
|
!if(!eq(addrKind, BUFAddrKind.OffEn), getMUBUFInsDA<vdataList, [VGPR_32], isLds>.ret,
|
|
|
|
!if(!eq(addrKind, BUFAddrKind.IdxEn), getMUBUFInsDA<vdataList, [VGPR_32], isLds>.ret,
|
|
|
|
!if(!eq(addrKind, BUFAddrKind.BothEn), getMUBUFInsDA<vdataList, [VReg_64], isLds>.ret,
|
|
|
|
!if(!eq(addrKind, BUFAddrKind.Addr64), getMUBUFInsDA<vdataList, [VReg_64], isLds>.ret,
|
2016-09-10 21:09:16 +08:00
|
|
|
(ins))))));
|
|
|
|
}
|
|
|
|
|
|
|
|
class getMUBUFAsmOps<int addrKind> {
|
|
|
|
string Pfx =
|
|
|
|
!if(!eq(addrKind, BUFAddrKind.Offset), "off, $srsrc, $soffset",
|
|
|
|
!if(!eq(addrKind, BUFAddrKind.OffEn), "$vaddr, $srsrc, $soffset offen",
|
|
|
|
!if(!eq(addrKind, BUFAddrKind.IdxEn), "$vaddr, $srsrc, $soffset idxen",
|
|
|
|
!if(!eq(addrKind, BUFAddrKind.BothEn), "$vaddr, $srsrc, $soffset idxen offen",
|
|
|
|
!if(!eq(addrKind, BUFAddrKind.Addr64), "$vaddr, $srsrc, $soffset addr64",
|
|
|
|
"")))));
|
|
|
|
string ret = Pfx # "$offset";
|
|
|
|
}
|
|
|
|
|
2016-11-01 08:55:14 +08:00
|
|
|
class MUBUF_SetupAddr<int addrKind> {
|
2016-09-10 21:09:16 +08:00
|
|
|
bits<1> offen = !if(!eq(addrKind, BUFAddrKind.OffEn), 1,
|
|
|
|
!if(!eq(addrKind, BUFAddrKind.BothEn), 1 , 0));
|
|
|
|
|
|
|
|
bits<1> idxen = !if(!eq(addrKind, BUFAddrKind.IdxEn), 1,
|
|
|
|
!if(!eq(addrKind, BUFAddrKind.BothEn), 1 , 0));
|
|
|
|
|
|
|
|
bits<1> addr64 = !if(!eq(addrKind, BUFAddrKind.Addr64), 1, 0);
|
|
|
|
|
|
|
|
bits<1> has_vaddr = !if(!eq(addrKind, BUFAddrKind.Offset), 0, 1);
|
|
|
|
}
|
|
|
|
|
|
|
|
class MUBUF_Load_Pseudo <string opName,
|
|
|
|
int addrKind,
|
2019-08-18 08:20:43 +08:00
|
|
|
ValueType vdata_vt,
|
2017-09-20 13:01:53 +08:00
|
|
|
bit HasTiedDest = 0,
|
2018-02-21 21:13:48 +08:00
|
|
|
bit isLds = 0,
|
2016-09-10 21:09:16 +08:00
|
|
|
list<dag> pattern=[],
|
|
|
|
// Workaround bug bz30254
|
|
|
|
int addrKindCopy = addrKind>
|
|
|
|
: MUBUF_Pseudo<opName,
|
2019-08-18 08:20:43 +08:00
|
|
|
(outs getVregSrcForVT<vdata_vt>.ret:$vdata),
|
2018-02-21 21:13:48 +08:00
|
|
|
!con(getMUBUFIns<addrKindCopy, [], isLds>.ret,
|
2019-08-18 08:20:43 +08:00
|
|
|
!if(HasTiedDest, (ins getVregSrcForVT<vdata_vt>.ret:$vdata_in), (ins))),
|
2018-02-21 21:13:48 +08:00
|
|
|
" $vdata, " # getMUBUFAsmOps<addrKindCopy>.ret # "$glc$slc" #
|
2019-05-01 06:08:23 +08:00
|
|
|
!if(isLds, " lds", "$tfe") # "$dlc",
|
2016-09-10 21:09:16 +08:00
|
|
|
pattern>,
|
|
|
|
MUBUF_SetupAddr<addrKindCopy> {
|
2018-02-21 21:13:48 +08:00
|
|
|
let PseudoInstr = opName # !if(isLds, "_lds", "") #
|
|
|
|
"_" # getAddrName<addrKindCopy>.ret;
|
2018-03-13 01:29:24 +08:00
|
|
|
let AsmMatchConverter = !if(isLds, "cvtMubufLds", "cvtMubuf");
|
2018-02-21 21:13:48 +08:00
|
|
|
|
2017-09-20 13:01:53 +08:00
|
|
|
let Constraints = !if(HasTiedDest, "$vdata = $vdata_in", "");
|
2016-09-10 21:09:16 +08:00
|
|
|
let mayLoad = 1;
|
|
|
|
let mayStore = 0;
|
2017-07-22 05:05:45 +08:00
|
|
|
let maybeAtomic = 1;
|
2018-02-21 21:13:48 +08:00
|
|
|
let Uses = !if(isLds, [EXEC, M0], [EXEC]);
|
|
|
|
let has_tfe = !if(isLds, 0, 1);
|
|
|
|
let lds = isLds;
|
2019-08-18 08:20:43 +08:00
|
|
|
let elements = getMUBUFElements<vdata_vt>.ret;
|
2016-09-10 21:09:16 +08:00
|
|
|
}
|
|
|
|
|
2019-07-16 05:41:44 +08:00
|
|
|
class MUBUF_Offset_Load_Pat <Instruction inst, ValueType load_vt = i32, SDPatternOperator ld = null_frag> : Pat <
|
|
|
|
(load_vt (ld (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc))),
|
|
|
|
(load_vt (inst v4i32:$srsrc, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc))
|
|
|
|
>;
|
|
|
|
|
|
|
|
class MUBUF_Addr64_Load_Pat <Instruction inst,
|
|
|
|
ValueType load_vt = i32,
|
|
|
|
SDPatternOperator ld = null_frag> : Pat <
|
|
|
|
(load_vt (ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc))),
|
|
|
|
(load_vt (inst i64:$vaddr, v4i32:$srsrc, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc))
|
|
|
|
>;
|
|
|
|
|
|
|
|
multiclass MUBUF_Pseudo_Load_Pats<string BaseInst, ValueType load_vt = i32, SDPatternOperator ld = null_frag> {
|
|
|
|
def : MUBUF_Offset_Load_Pat<!cast<Instruction>(BaseInst#"_OFFSET"), load_vt, ld>;
|
|
|
|
def : MUBUF_Addr64_Load_Pat<!cast<Instruction>(BaseInst#"_ADDR64"), load_vt, ld>;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2016-09-10 21:09:16 +08:00
|
|
|
// FIXME: tfe can't be an operand because it requires a separate
|
|
|
|
// opcode because it needs an N+1 register class dest register.
|
2019-08-18 08:20:43 +08:00
|
|
|
multiclass MUBUF_Pseudo_Loads<string opName,
|
2016-09-10 21:09:16 +08:00
|
|
|
ValueType load_vt = i32,
|
2017-09-20 13:01:53 +08:00
|
|
|
SDPatternOperator ld = null_frag,
|
2018-02-21 21:13:48 +08:00
|
|
|
bit TiedDest = 0,
|
|
|
|
bit isLds = 0> {
|
2016-09-10 21:09:16 +08:00
|
|
|
|
2019-08-18 08:20:43 +08:00
|
|
|
def _OFFSET : MUBUF_Load_Pseudo <opName, BUFAddrKind.Offset, load_vt, TiedDest, isLds>,
|
TableGen: Streamline the semantics of NAME
Summary:
The new rules are straightforward. The main rules to keep in mind
are:
1. NAME is an implicit template argument of class and multiclass,
and will be substituted by the name of the instantiating def/defm.
2. The name of a def/defm in a multiclass must contain a reference
to NAME. If such a reference is not present, it is automatically
prepended.
And for some additional subtleties, consider these:
3. defm with no name generates a unique name but has no special
behavior otherwise.
4. def with no name generates an anonymous record, whose name is
unique but undefined. In particular, the name won't contain a
reference to NAME.
Keeping rules 1&2 in mind should allow a predictable behavior of
name resolution that is simple to follow.
The old "rules" were rather surprising: sometimes (but not always),
NAME would correspond to the name of the toplevel defm. They were
also plain bonkers when you pushed them to their limits, as the old
version of the TableGen test case shows.
Having NAME correspond to the name of the toplevel defm introduces
"spooky action at a distance" and breaks composability:
refactoring the upper layers of a hierarchy of nested multiclass
instantiations can cause unexpected breakage by changing the value
of NAME at a lower level of the hierarchy. The new rules don't
suffer from this problem.
Some existing .td files have to be adjusted because they ended up
depending on the details of the old implementation.
Change-Id: I694095231565b30f563e6fd0417b41ee01a12589
Reviewers: tra, simon_tatham, craig.topper, MartinO, arsenm, javed.absar
Subscribers: wdng, llvm-commits
Differential Revision: https://reviews.llvm.org/D47430
llvm-svn: 333900
2018-06-04 22:26:05 +08:00
|
|
|
MUBUFAddr64Table<0, NAME # !if(isLds, "_LDS", "")>;
|
2016-09-10 21:09:16 +08:00
|
|
|
|
2019-08-18 08:20:43 +08:00
|
|
|
def _ADDR64 : MUBUF_Load_Pseudo <opName, BUFAddrKind.Addr64, load_vt, TiedDest, isLds>,
|
TableGen: Streamline the semantics of NAME
Summary:
The new rules are straightforward. The main rules to keep in mind
are:
1. NAME is an implicit template argument of class and multiclass,
and will be substituted by the name of the instantiating def/defm.
2. The name of a def/defm in a multiclass must contain a reference
to NAME. If such a reference is not present, it is automatically
prepended.
And for some additional subtleties, consider these:
3. defm with no name generates a unique name but has no special
behavior otherwise.
4. def with no name generates an anonymous record, whose name is
unique but undefined. In particular, the name won't contain a
reference to NAME.
Keeping rules 1&2 in mind should allow a predictable behavior of
name resolution that is simple to follow.
The old "rules" were rather surprising: sometimes (but not always),
NAME would correspond to the name of the toplevel defm. They were
also plain bonkers when you pushed them to their limits, as the old
version of the TableGen test case shows.
Having NAME correspond to the name of the toplevel defm introduces
"spooky action at a distance" and breaks composability:
refactoring the upper layers of a hierarchy of nested multiclass
instantiations can cause unexpected breakage by changing the value
of NAME at a lower level of the hierarchy. The new rules don't
suffer from this problem.
Some existing .td files have to be adjusted because they ended up
depending on the details of the old implementation.
Change-Id: I694095231565b30f563e6fd0417b41ee01a12589
Reviewers: tra, simon_tatham, craig.topper, MartinO, arsenm, javed.absar
Subscribers: wdng, llvm-commits
Differential Revision: https://reviews.llvm.org/D47430
llvm-svn: 333900
2018-06-04 22:26:05 +08:00
|
|
|
MUBUFAddr64Table<1, NAME # !if(isLds, "_LDS", "")>;
|
2016-09-10 21:09:16 +08:00
|
|
|
|
2019-08-18 08:20:43 +08:00
|
|
|
def _OFFEN : MUBUF_Load_Pseudo <opName, BUFAddrKind.OffEn, load_vt, TiedDest, isLds>;
|
|
|
|
def _IDXEN : MUBUF_Load_Pseudo <opName, BUFAddrKind.IdxEn, load_vt, TiedDest, isLds>;
|
|
|
|
def _BOTHEN : MUBUF_Load_Pseudo <opName, BUFAddrKind.BothEn, load_vt, TiedDest, isLds>;
|
2016-09-10 21:09:16 +08:00
|
|
|
|
|
|
|
let DisableWQM = 1 in {
|
2019-08-18 08:20:43 +08:00
|
|
|
def _OFFSET_exact : MUBUF_Load_Pseudo <opName, BUFAddrKind.Offset, load_vt, TiedDest, isLds>;
|
|
|
|
def _OFFEN_exact : MUBUF_Load_Pseudo <opName, BUFAddrKind.OffEn, load_vt, TiedDest, isLds>;
|
|
|
|
def _IDXEN_exact : MUBUF_Load_Pseudo <opName, BUFAddrKind.IdxEn, load_vt, TiedDest, isLds>;
|
|
|
|
def _BOTHEN_exact : MUBUF_Load_Pseudo <opName, BUFAddrKind.BothEn, load_vt, TiedDest, isLds>;
|
2016-09-10 21:09:16 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-08-18 08:20:43 +08:00
|
|
|
multiclass MUBUF_Pseudo_Loads_Lds<string opName, ValueType load_vt = i32,
|
2018-02-21 21:13:48 +08:00
|
|
|
SDPatternOperator ld_nolds = null_frag,
|
|
|
|
SDPatternOperator ld_lds = null_frag> {
|
2019-08-18 08:20:43 +08:00
|
|
|
defm NAME : MUBUF_Pseudo_Loads<opName, load_vt, ld_nolds>;
|
|
|
|
defm _LDS : MUBUF_Pseudo_Loads<opName, load_vt, ld_lds, 0, 1>;
|
2018-02-21 21:13:48 +08:00
|
|
|
}
|
|
|
|
|
2016-09-10 21:09:16 +08:00
|
|
|
class MUBUF_Store_Pseudo <string opName,
|
|
|
|
int addrKind,
|
2019-08-18 08:20:43 +08:00
|
|
|
ValueType store_vt,
|
2016-09-10 21:09:16 +08:00
|
|
|
list<dag> pattern=[],
|
|
|
|
// Workaround bug bz30254
|
2019-08-18 08:20:43 +08:00
|
|
|
int addrKindCopy = addrKind>
|
2016-09-10 21:09:16 +08:00
|
|
|
: MUBUF_Pseudo<opName,
|
|
|
|
(outs),
|
2019-08-18 08:20:43 +08:00
|
|
|
getMUBUFIns<addrKindCopy, [getVregSrcForVT<store_vt>.ret]>.ret,
|
2019-05-01 06:08:23 +08:00
|
|
|
" $vdata, " # getMUBUFAsmOps<addrKindCopy>.ret # "$glc$slc$tfe$dlc",
|
2016-09-10 21:09:16 +08:00
|
|
|
pattern>,
|
|
|
|
MUBUF_SetupAddr<addrKindCopy> {
|
|
|
|
let PseudoInstr = opName # "_" # getAddrName<addrKindCopy>.ret;
|
|
|
|
let mayLoad = 0;
|
|
|
|
let mayStore = 1;
|
2017-07-22 05:05:45 +08:00
|
|
|
let maybeAtomic = 1;
|
2019-08-18 08:20:43 +08:00
|
|
|
let elements = getMUBUFElements<store_vt>.ret;
|
2016-09-10 21:09:16 +08:00
|
|
|
}
|
|
|
|
|
2019-08-18 08:20:43 +08:00
|
|
|
multiclass MUBUF_Pseudo_Stores<string opName,
|
2016-09-10 21:09:16 +08:00
|
|
|
ValueType store_vt = i32,
|
|
|
|
SDPatternOperator st = null_frag> {
|
|
|
|
|
2019-08-18 08:20:43 +08:00
|
|
|
def _OFFSET : MUBUF_Store_Pseudo <opName, BUFAddrKind.Offset, store_vt,
|
2016-09-10 21:09:16 +08:00
|
|
|
[(st store_vt:$vdata, (MUBUFOffset v4i32:$srsrc, i32:$soffset,
|
2019-05-01 06:08:23 +08:00
|
|
|
i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc))]>,
|
TableGen: Streamline the semantics of NAME
Summary:
The new rules are straightforward. The main rules to keep in mind
are:
1. NAME is an implicit template argument of class and multiclass,
and will be substituted by the name of the instantiating def/defm.
2. The name of a def/defm in a multiclass must contain a reference
to NAME. If such a reference is not present, it is automatically
prepended.
And for some additional subtleties, consider these:
3. defm with no name generates a unique name but has no special
behavior otherwise.
4. def with no name generates an anonymous record, whose name is
unique but undefined. In particular, the name won't contain a
reference to NAME.
Keeping rules 1&2 in mind should allow a predictable behavior of
name resolution that is simple to follow.
The old "rules" were rather surprising: sometimes (but not always),
NAME would correspond to the name of the toplevel defm. They were
also plain bonkers when you pushed them to their limits, as the old
version of the TableGen test case shows.
Having NAME correspond to the name of the toplevel defm introduces
"spooky action at a distance" and breaks composability:
refactoring the upper layers of a hierarchy of nested multiclass
instantiations can cause unexpected breakage by changing the value
of NAME at a lower level of the hierarchy. The new rules don't
suffer from this problem.
Some existing .td files have to be adjusted because they ended up
depending on the details of the old implementation.
Change-Id: I694095231565b30f563e6fd0417b41ee01a12589
Reviewers: tra, simon_tatham, craig.topper, MartinO, arsenm, javed.absar
Subscribers: wdng, llvm-commits
Differential Revision: https://reviews.llvm.org/D47430
llvm-svn: 333900
2018-06-04 22:26:05 +08:00
|
|
|
MUBUFAddr64Table<0, NAME>;
|
2016-09-10 21:09:16 +08:00
|
|
|
|
2019-08-18 08:20:43 +08:00
|
|
|
def _ADDR64 : MUBUF_Store_Pseudo <opName, BUFAddrKind.Addr64, store_vt,
|
2016-09-10 21:09:16 +08:00
|
|
|
[(st store_vt:$vdata, (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset,
|
2019-05-01 06:08:23 +08:00
|
|
|
i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc))]>,
|
TableGen: Streamline the semantics of NAME
Summary:
The new rules are straightforward. The main rules to keep in mind
are:
1. NAME is an implicit template argument of class and multiclass,
and will be substituted by the name of the instantiating def/defm.
2. The name of a def/defm in a multiclass must contain a reference
to NAME. If such a reference is not present, it is automatically
prepended.
And for some additional subtleties, consider these:
3. defm with no name generates a unique name but has no special
behavior otherwise.
4. def with no name generates an anonymous record, whose name is
unique but undefined. In particular, the name won't contain a
reference to NAME.
Keeping rules 1&2 in mind should allow a predictable behavior of
name resolution that is simple to follow.
The old "rules" were rather surprising: sometimes (but not always),
NAME would correspond to the name of the toplevel defm. They were
also plain bonkers when you pushed them to their limits, as the old
version of the TableGen test case shows.
Having NAME correspond to the name of the toplevel defm introduces
"spooky action at a distance" and breaks composability:
refactoring the upper layers of a hierarchy of nested multiclass
instantiations can cause unexpected breakage by changing the value
of NAME at a lower level of the hierarchy. The new rules don't
suffer from this problem.
Some existing .td files have to be adjusted because they ended up
depending on the details of the old implementation.
Change-Id: I694095231565b30f563e6fd0417b41ee01a12589
Reviewers: tra, simon_tatham, craig.topper, MartinO, arsenm, javed.absar
Subscribers: wdng, llvm-commits
Differential Revision: https://reviews.llvm.org/D47430
llvm-svn: 333900
2018-06-04 22:26:05 +08:00
|
|
|
MUBUFAddr64Table<1, NAME>;
|
2016-09-10 21:09:16 +08:00
|
|
|
|
2019-08-18 08:20:43 +08:00
|
|
|
def _OFFEN : MUBUF_Store_Pseudo <opName, BUFAddrKind.OffEn, store_vt>;
|
|
|
|
def _IDXEN : MUBUF_Store_Pseudo <opName, BUFAddrKind.IdxEn, store_vt>;
|
|
|
|
def _BOTHEN : MUBUF_Store_Pseudo <opName, BUFAddrKind.BothEn, store_vt>;
|
2016-09-10 21:09:16 +08:00
|
|
|
|
|
|
|
let DisableWQM = 1 in {
|
2019-08-18 08:20:43 +08:00
|
|
|
def _OFFSET_exact : MUBUF_Store_Pseudo <opName, BUFAddrKind.Offset, store_vt>;
|
|
|
|
def _OFFEN_exact : MUBUF_Store_Pseudo <opName, BUFAddrKind.OffEn, store_vt>;
|
|
|
|
def _IDXEN_exact : MUBUF_Store_Pseudo <opName, BUFAddrKind.IdxEn, store_vt>;
|
|
|
|
def _BOTHEN_exact : MUBUF_Store_Pseudo <opName, BUFAddrKind.BothEn, store_vt>;
|
2016-09-10 21:09:16 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-03-13 01:29:24 +08:00
|
|
|
class MUBUF_Pseudo_Store_Lds<string opName>
|
|
|
|
: MUBUF_Pseudo<opName,
|
|
|
|
(outs),
|
2018-06-04 22:45:20 +08:00
|
|
|
(ins SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, GLC:$glc, SLC:$slc),
|
2018-03-13 01:29:24 +08:00
|
|
|
" $srsrc, $soffset$offset lds$glc$slc"> {
|
|
|
|
let mayLoad = 0;
|
|
|
|
let mayStore = 1;
|
|
|
|
let maybeAtomic = 1;
|
|
|
|
|
|
|
|
let has_vdata = 0;
|
|
|
|
let has_vaddr = 0;
|
|
|
|
let has_tfe = 0;
|
|
|
|
let lds = 1;
|
|
|
|
|
|
|
|
let Uses = [EXEC, M0];
|
|
|
|
let AsmMatchConverter = "cvtMubufLds";
|
|
|
|
}
|
2016-09-10 21:09:16 +08:00
|
|
|
|
|
|
|
class getMUBUFAtomicInsDA<RegisterClass vdataClass, bit vdata_in,
|
|
|
|
list<RegisterClass> vaddrList=[]> {
|
|
|
|
RegisterClass vaddrClass = !if(!empty(vaddrList), ?, !head(vaddrList));
|
|
|
|
dag ret = !if(vdata_in,
|
|
|
|
!if(!empty(vaddrList),
|
|
|
|
(ins vdataClass:$vdata_in,
|
2018-06-04 22:45:20 +08:00
|
|
|
SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, SLC:$slc),
|
2016-09-10 21:09:16 +08:00
|
|
|
(ins vdataClass:$vdata_in, vaddrClass:$vaddr,
|
2018-06-04 22:45:20 +08:00
|
|
|
SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, SLC:$slc)
|
2016-09-10 21:09:16 +08:00
|
|
|
),
|
|
|
|
!if(!empty(vaddrList),
|
|
|
|
(ins vdataClass:$vdata,
|
2018-06-04 22:45:20 +08:00
|
|
|
SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, SLC:$slc),
|
2016-09-10 21:09:16 +08:00
|
|
|
(ins vdataClass:$vdata, vaddrClass:$vaddr,
|
2018-06-04 22:45:20 +08:00
|
|
|
SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, SLC:$slc)
|
2016-09-10 21:09:16 +08:00
|
|
|
));
|
|
|
|
}
|
|
|
|
|
|
|
|
class getMUBUFAtomicIns<int addrKind,
|
|
|
|
RegisterClass vdataClass,
|
|
|
|
bit vdata_in,
|
|
|
|
// Workaround bug bz30254
|
|
|
|
RegisterClass vdataClassCopy=vdataClass> {
|
|
|
|
dag ret =
|
|
|
|
!if(!eq(addrKind, BUFAddrKind.Offset),
|
|
|
|
getMUBUFAtomicInsDA<vdataClassCopy, vdata_in>.ret,
|
|
|
|
!if(!eq(addrKind, BUFAddrKind.OffEn),
|
|
|
|
getMUBUFAtomicInsDA<vdataClassCopy, vdata_in, [VGPR_32]>.ret,
|
|
|
|
!if(!eq(addrKind, BUFAddrKind.IdxEn),
|
|
|
|
getMUBUFAtomicInsDA<vdataClassCopy, vdata_in, [VGPR_32]>.ret,
|
|
|
|
!if(!eq(addrKind, BUFAddrKind.BothEn),
|
|
|
|
getMUBUFAtomicInsDA<vdataClassCopy, vdata_in, [VReg_64]>.ret,
|
|
|
|
!if(!eq(addrKind, BUFAddrKind.Addr64),
|
|
|
|
getMUBUFAtomicInsDA<vdataClassCopy, vdata_in, [VReg_64]>.ret,
|
|
|
|
(ins))))));
|
|
|
|
}
|
|
|
|
|
|
|
|
class MUBUF_Atomic_Pseudo<string opName,
|
|
|
|
int addrKind,
|
|
|
|
dag outs,
|
|
|
|
dag ins,
|
|
|
|
string asmOps,
|
|
|
|
list<dag> pattern=[],
|
|
|
|
// Workaround bug bz30254
|
|
|
|
int addrKindCopy = addrKind>
|
|
|
|
: MUBUF_Pseudo<opName, outs, ins, asmOps, pattern>,
|
|
|
|
MUBUF_SetupAddr<addrKindCopy> {
|
|
|
|
let mayStore = 1;
|
|
|
|
let mayLoad = 1;
|
|
|
|
let hasPostISelHook = 1;
|
|
|
|
let hasSideEffects = 1;
|
|
|
|
let DisableWQM = 1;
|
|
|
|
let has_glc = 0;
|
2019-05-01 06:08:23 +08:00
|
|
|
let has_dlc = 0;
|
2016-09-10 21:09:16 +08:00
|
|
|
let has_tfe = 0;
|
2017-07-22 05:05:45 +08:00
|
|
|
let maybeAtomic = 1;
|
2016-09-10 21:09:16 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
class MUBUF_AtomicNoRet_Pseudo<string opName, int addrKind,
|
|
|
|
RegisterClass vdataClass,
|
|
|
|
list<dag> pattern=[],
|
|
|
|
// Workaround bug bz30254
|
|
|
|
int addrKindCopy = addrKind,
|
|
|
|
RegisterClass vdataClassCopy = vdataClass>
|
|
|
|
: MUBUF_Atomic_Pseudo<opName, addrKindCopy,
|
|
|
|
(outs),
|
|
|
|
getMUBUFAtomicIns<addrKindCopy, vdataClassCopy, 0>.ret,
|
|
|
|
" $vdata, " # getMUBUFAsmOps<addrKindCopy>.ret # "$slc",
|
|
|
|
pattern>,
|
|
|
|
AtomicNoRet<opName # "_" # getAddrName<addrKindCopy>.ret, 0> {
|
|
|
|
let PseudoInstr = opName # "_" # getAddrName<addrKindCopy>.ret;
|
|
|
|
let glc_value = 0;
|
2019-05-01 06:08:23 +08:00
|
|
|
let dlc_value = 0;
|
2016-09-10 21:09:16 +08:00
|
|
|
let AsmMatchConverter = "cvtMubufAtomic";
|
|
|
|
}
|
|
|
|
|
|
|
|
class MUBUF_AtomicRet_Pseudo<string opName, int addrKind,
|
|
|
|
RegisterClass vdataClass,
|
|
|
|
list<dag> pattern=[],
|
|
|
|
// Workaround bug bz30254
|
|
|
|
int addrKindCopy = addrKind,
|
|
|
|
RegisterClass vdataClassCopy = vdataClass>
|
|
|
|
: MUBUF_Atomic_Pseudo<opName, addrKindCopy,
|
|
|
|
(outs vdataClassCopy:$vdata),
|
|
|
|
getMUBUFAtomicIns<addrKindCopy, vdataClassCopy, 1>.ret,
|
|
|
|
" $vdata, " # getMUBUFAsmOps<addrKindCopy>.ret # " glc$slc",
|
|
|
|
pattern>,
|
|
|
|
AtomicNoRet<opName # "_" # getAddrName<addrKindCopy>.ret, 1> {
|
|
|
|
let PseudoInstr = opName # "_rtn_" # getAddrName<addrKindCopy>.ret;
|
|
|
|
let glc_value = 1;
|
2019-05-01 06:08:23 +08:00
|
|
|
let dlc_value = 0;
|
2016-09-10 21:09:16 +08:00
|
|
|
let Constraints = "$vdata = $vdata_in";
|
|
|
|
let DisableEncoding = "$vdata_in";
|
|
|
|
let AsmMatchConverter = "cvtMubufAtomicReturn";
|
|
|
|
}
|
|
|
|
|
2018-11-08 05:21:32 +08:00
|
|
|
multiclass MUBUF_Pseudo_Atomics_NO_RTN <string opName,
|
|
|
|
RegisterClass vdataClass,
|
|
|
|
ValueType vdataType,
|
2019-06-22 00:30:14 +08:00
|
|
|
SDPatternOperator atomic,
|
|
|
|
bit isFP = getIsFP<vdataType>.ret> {
|
|
|
|
let FPAtomic = isFP in
|
2016-09-10 21:09:16 +08:00
|
|
|
def _OFFSET : MUBUF_AtomicNoRet_Pseudo <opName, BUFAddrKind.Offset, vdataClass>,
|
TableGen: Streamline the semantics of NAME
Summary:
The new rules are straightforward. The main rules to keep in mind
are:
1. NAME is an implicit template argument of class and multiclass,
and will be substituted by the name of the instantiating def/defm.
2. The name of a def/defm in a multiclass must contain a reference
to NAME. If such a reference is not present, it is automatically
prepended.
And for some additional subtleties, consider these:
3. defm with no name generates a unique name but has no special
behavior otherwise.
4. def with no name generates an anonymous record, whose name is
unique but undefined. In particular, the name won't contain a
reference to NAME.
Keeping rules 1&2 in mind should allow a predictable behavior of
name resolution that is simple to follow.
The old "rules" were rather surprising: sometimes (but not always),
NAME would correspond to the name of the toplevel defm. They were
also plain bonkers when you pushed them to their limits, as the old
version of the TableGen test case shows.
Having NAME correspond to the name of the toplevel defm introduces
"spooky action at a distance" and breaks composability:
refactoring the upper layers of a hierarchy of nested multiclass
instantiations can cause unexpected breakage by changing the value
of NAME at a lower level of the hierarchy. The new rules don't
suffer from this problem.
Some existing .td files have to be adjusted because they ended up
depending on the details of the old implementation.
Change-Id: I694095231565b30f563e6fd0417b41ee01a12589
Reviewers: tra, simon_tatham, craig.topper, MartinO, arsenm, javed.absar
Subscribers: wdng, llvm-commits
Differential Revision: https://reviews.llvm.org/D47430
llvm-svn: 333900
2018-06-04 22:26:05 +08:00
|
|
|
MUBUFAddr64Table <0, NAME>;
|
2019-06-22 00:30:14 +08:00
|
|
|
|
|
|
|
let FPAtomic = isFP in
|
2016-09-10 21:09:16 +08:00
|
|
|
def _ADDR64 : MUBUF_AtomicNoRet_Pseudo <opName, BUFAddrKind.Addr64, vdataClass>,
|
TableGen: Streamline the semantics of NAME
Summary:
The new rules are straightforward. The main rules to keep in mind
are:
1. NAME is an implicit template argument of class and multiclass,
and will be substituted by the name of the instantiating def/defm.
2. The name of a def/defm in a multiclass must contain a reference
to NAME. If such a reference is not present, it is automatically
prepended.
And for some additional subtleties, consider these:
3. defm with no name generates a unique name but has no special
behavior otherwise.
4. def with no name generates an anonymous record, whose name is
unique but undefined. In particular, the name won't contain a
reference to NAME.
Keeping rules 1&2 in mind should allow a predictable behavior of
name resolution that is simple to follow.
The old "rules" were rather surprising: sometimes (but not always),
NAME would correspond to the name of the toplevel defm. They were
also plain bonkers when you pushed them to their limits, as the old
version of the TableGen test case shows.
Having NAME correspond to the name of the toplevel defm introduces
"spooky action at a distance" and breaks composability:
refactoring the upper layers of a hierarchy of nested multiclass
instantiations can cause unexpected breakage by changing the value
of NAME at a lower level of the hierarchy. The new rules don't
suffer from this problem.
Some existing .td files have to be adjusted because they ended up
depending on the details of the old implementation.
Change-Id: I694095231565b30f563e6fd0417b41ee01a12589
Reviewers: tra, simon_tatham, craig.topper, MartinO, arsenm, javed.absar
Subscribers: wdng, llvm-commits
Differential Revision: https://reviews.llvm.org/D47430
llvm-svn: 333900
2018-06-04 22:26:05 +08:00
|
|
|
MUBUFAddr64Table <1, NAME>;
|
2019-06-22 00:30:14 +08:00
|
|
|
|
|
|
|
let FPAtomic = isFP in
|
2016-09-10 21:09:16 +08:00
|
|
|
def _OFFEN : MUBUF_AtomicNoRet_Pseudo <opName, BUFAddrKind.OffEn, vdataClass>;
|
2019-06-22 00:30:14 +08:00
|
|
|
|
|
|
|
let FPAtomic = isFP in
|
|
|
|
|
2016-09-10 21:09:16 +08:00
|
|
|
def _IDXEN : MUBUF_AtomicNoRet_Pseudo <opName, BUFAddrKind.IdxEn, vdataClass>;
|
2019-06-22 00:30:14 +08:00
|
|
|
|
|
|
|
let FPAtomic = isFP in
|
2016-09-10 21:09:16 +08:00
|
|
|
def _BOTHEN : MUBUF_AtomicNoRet_Pseudo <opName, BUFAddrKind.BothEn, vdataClass>;
|
2018-11-08 05:21:32 +08:00
|
|
|
}
|
2016-09-10 21:09:16 +08:00
|
|
|
|
2018-11-08 05:21:32 +08:00
|
|
|
multiclass MUBUF_Pseudo_Atomics_RTN <string opName,
|
|
|
|
RegisterClass vdataClass,
|
|
|
|
ValueType vdataType,
|
2019-06-22 00:30:14 +08:00
|
|
|
SDPatternOperator atomic,
|
|
|
|
bit isFP = getIsFP<vdataType>.ret> {
|
|
|
|
let FPAtomic = isFP in
|
2017-07-21 05:06:04 +08:00
|
|
|
def _OFFSET_RTN : MUBUF_AtomicRet_Pseudo <opName, BUFAddrKind.Offset, vdataClass,
|
2016-09-10 21:09:16 +08:00
|
|
|
[(set vdataType:$vdata,
|
|
|
|
(atomic (MUBUFOffsetAtomic v4i32:$srsrc, i32:$soffset, i16:$offset, i1:$slc),
|
|
|
|
vdataType:$vdata_in))]>,
|
TableGen: Streamline the semantics of NAME
Summary:
The new rules are straightforward. The main rules to keep in mind
are:
1. NAME is an implicit template argument of class and multiclass,
and will be substituted by the name of the instantiating def/defm.
2. The name of a def/defm in a multiclass must contain a reference
to NAME. If such a reference is not present, it is automatically
prepended.
And for some additional subtleties, consider these:
3. defm with no name generates a unique name but has no special
behavior otherwise.
4. def with no name generates an anonymous record, whose name is
unique but undefined. In particular, the name won't contain a
reference to NAME.
Keeping rules 1&2 in mind should allow a predictable behavior of
name resolution that is simple to follow.
The old "rules" were rather surprising: sometimes (but not always),
NAME would correspond to the name of the toplevel defm. They were
also plain bonkers when you pushed them to their limits, as the old
version of the TableGen test case shows.
Having NAME correspond to the name of the toplevel defm introduces
"spooky action at a distance" and breaks composability:
refactoring the upper layers of a hierarchy of nested multiclass
instantiations can cause unexpected breakage by changing the value
of NAME at a lower level of the hierarchy. The new rules don't
suffer from this problem.
Some existing .td files have to be adjusted because they ended up
depending on the details of the old implementation.
Change-Id: I694095231565b30f563e6fd0417b41ee01a12589
Reviewers: tra, simon_tatham, craig.topper, MartinO, arsenm, javed.absar
Subscribers: wdng, llvm-commits
Differential Revision: https://reviews.llvm.org/D47430
llvm-svn: 333900
2018-06-04 22:26:05 +08:00
|
|
|
MUBUFAddr64Table <0, NAME # "_RTN">;
|
2016-09-10 21:09:16 +08:00
|
|
|
|
2019-06-22 00:30:14 +08:00
|
|
|
let FPAtomic = isFP in
|
2017-07-21 05:06:04 +08:00
|
|
|
def _ADDR64_RTN : MUBUF_AtomicRet_Pseudo <opName, BUFAddrKind.Addr64, vdataClass,
|
2016-09-10 21:09:16 +08:00
|
|
|
[(set vdataType:$vdata,
|
|
|
|
(atomic (MUBUFAddr64Atomic v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset, i1:$slc),
|
|
|
|
vdataType:$vdata_in))]>,
|
TableGen: Streamline the semantics of NAME
Summary:
The new rules are straightforward. The main rules to keep in mind
are:
1. NAME is an implicit template argument of class and multiclass,
and will be substituted by the name of the instantiating def/defm.
2. The name of a def/defm in a multiclass must contain a reference
to NAME. If such a reference is not present, it is automatically
prepended.
And for some additional subtleties, consider these:
3. defm with no name generates a unique name but has no special
behavior otherwise.
4. def with no name generates an anonymous record, whose name is
unique but undefined. In particular, the name won't contain a
reference to NAME.
Keeping rules 1&2 in mind should allow a predictable behavior of
name resolution that is simple to follow.
The old "rules" were rather surprising: sometimes (but not always),
NAME would correspond to the name of the toplevel defm. They were
also plain bonkers when you pushed them to their limits, as the old
version of the TableGen test case shows.
Having NAME correspond to the name of the toplevel defm introduces
"spooky action at a distance" and breaks composability:
refactoring the upper layers of a hierarchy of nested multiclass
instantiations can cause unexpected breakage by changing the value
of NAME at a lower level of the hierarchy. The new rules don't
suffer from this problem.
Some existing .td files have to be adjusted because they ended up
depending on the details of the old implementation.
Change-Id: I694095231565b30f563e6fd0417b41ee01a12589
Reviewers: tra, simon_tatham, craig.topper, MartinO, arsenm, javed.absar
Subscribers: wdng, llvm-commits
Differential Revision: https://reviews.llvm.org/D47430
llvm-svn: 333900
2018-06-04 22:26:05 +08:00
|
|
|
MUBUFAddr64Table <1, NAME # "_RTN">;
|
2016-09-10 21:09:16 +08:00
|
|
|
|
2019-06-22 00:30:14 +08:00
|
|
|
let FPAtomic = isFP in
|
2017-07-21 05:06:04 +08:00
|
|
|
def _OFFEN_RTN : MUBUF_AtomicRet_Pseudo <opName, BUFAddrKind.OffEn, vdataClass>;
|
2019-06-22 00:30:14 +08:00
|
|
|
|
|
|
|
let FPAtomic = isFP in
|
2017-07-21 05:06:04 +08:00
|
|
|
def _IDXEN_RTN : MUBUF_AtomicRet_Pseudo <opName, BUFAddrKind.IdxEn, vdataClass>;
|
2019-06-22 00:30:14 +08:00
|
|
|
|
|
|
|
let FPAtomic = isFP in
|
2017-07-21 05:06:04 +08:00
|
|
|
def _BOTHEN_RTN : MUBUF_AtomicRet_Pseudo <opName, BUFAddrKind.BothEn, vdataClass>;
|
2016-09-10 21:09:16 +08:00
|
|
|
}
|
|
|
|
|
2018-11-08 05:21:32 +08:00
|
|
|
multiclass MUBUF_Pseudo_Atomics <string opName,
|
|
|
|
RegisterClass vdataClass,
|
|
|
|
ValueType vdataType,
|
|
|
|
SDPatternOperator atomic> :
|
|
|
|
MUBUF_Pseudo_Atomics_NO_RTN<opName, vdataClass, vdataType, atomic>,
|
|
|
|
MUBUF_Pseudo_Atomics_RTN<opName, vdataClass, vdataType, atomic>;
|
|
|
|
|
2016-09-10 21:09:16 +08:00
|
|
|
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// MUBUF Instructions
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2018-02-21 21:13:48 +08:00
|
|
|
defm BUFFER_LOAD_FORMAT_X : MUBUF_Pseudo_Loads_Lds <
|
2019-08-18 08:20:43 +08:00
|
|
|
"buffer_load_format_x", f32
|
2016-09-10 21:09:16 +08:00
|
|
|
>;
|
|
|
|
defm BUFFER_LOAD_FORMAT_XY : MUBUF_Pseudo_Loads <
|
2019-08-18 08:20:43 +08:00
|
|
|
"buffer_load_format_xy", v2f32
|
2016-09-10 21:09:16 +08:00
|
|
|
>;
|
|
|
|
defm BUFFER_LOAD_FORMAT_XYZ : MUBUF_Pseudo_Loads <
|
2019-08-18 08:20:43 +08:00
|
|
|
"buffer_load_format_xyz", v3f32
|
2016-09-10 21:09:16 +08:00
|
|
|
>;
|
|
|
|
defm BUFFER_LOAD_FORMAT_XYZW : MUBUF_Pseudo_Loads <
|
2019-08-18 08:20:43 +08:00
|
|
|
"buffer_load_format_xyzw", v4f32
|
2016-09-10 21:09:16 +08:00
|
|
|
>;
|
|
|
|
defm BUFFER_STORE_FORMAT_X : MUBUF_Pseudo_Stores <
|
2019-08-18 08:20:43 +08:00
|
|
|
"buffer_store_format_x", f32
|
2016-09-10 21:09:16 +08:00
|
|
|
>;
|
|
|
|
defm BUFFER_STORE_FORMAT_XY : MUBUF_Pseudo_Stores <
|
2019-08-18 08:20:43 +08:00
|
|
|
"buffer_store_format_xy", v2f32
|
2016-09-10 21:09:16 +08:00
|
|
|
>;
|
|
|
|
defm BUFFER_STORE_FORMAT_XYZ : MUBUF_Pseudo_Stores <
|
2019-08-18 08:20:43 +08:00
|
|
|
"buffer_store_format_xyz", v3f32
|
2016-09-10 21:09:16 +08:00
|
|
|
>;
|
|
|
|
defm BUFFER_STORE_FORMAT_XYZW : MUBUF_Pseudo_Stores <
|
2019-08-18 08:20:43 +08:00
|
|
|
"buffer_store_format_xyzw", v4f32
|
2016-09-10 21:09:16 +08:00
|
|
|
>;
|
2018-01-13 05:12:19 +08:00
|
|
|
|
AMDGPU: Turn D16 for MIMG instructions into a regular operand
Summary:
This allows us to reduce the number of different machine instruction
opcodes, which reduces the table sizes and helps flatten the TableGen
multiclass hierarchies.
We can do this because for each hardware MIMG opcode, we have a full set
of IMAGE_xxx_Vn_Vm machine instructions for all required sizes of vdata
and vaddr registers. Instead of having separate D16 machine instructions,
a packed D16 instructions loading e.g. 4 components can simply use the
same V2 opcode variant that non-D16 instructions use.
We still require a TSFlag for D16 buffer instructions, because the
D16-ness of buffer instructions is part of the opcode. Renaming the flag
should help avoid future confusion.
The one non-obvious code change is that for gather4 instructions, the
disassembler can no longer automatically decide whether to use a V2 or
a V4 variant. The existing logic which choose the correct variant for
other MIMG instruction is extended to cover gather4 as well.
As a bonus, some of the assembler error messages are now more helpful
(e.g., complaining about a wrong data size instead of a non-existing
instruction).
While we're at it, delete a whole bunch of dead legacy TableGen code.
Change-Id: I89b02c2841c06f95e662541433e597f5d4553978
Reviewers: arsenm, rampitec, kzhuravl, artem.tamazov, dp, rtaylor
Subscribers: wdng, yaxunl, dstuttard, tpr, t-tye, llvm-commits
Differential Revision: https://reviews.llvm.org/D47434
llvm-svn: 335222
2018-06-21 21:36:01 +08:00
|
|
|
let SubtargetPredicate = HasUnpackedD16VMem, D16Buf = 1 in {
|
2018-01-13 05:12:19 +08:00
|
|
|
defm BUFFER_LOAD_FORMAT_D16_X_gfx80 : MUBUF_Pseudo_Loads <
|
2019-08-18 08:20:43 +08:00
|
|
|
"buffer_load_format_d16_x", i32
|
2018-01-13 05:12:19 +08:00
|
|
|
>;
|
|
|
|
defm BUFFER_LOAD_FORMAT_D16_XY_gfx80 : MUBUF_Pseudo_Loads <
|
2019-08-18 08:20:43 +08:00
|
|
|
"buffer_load_format_d16_xy", v2i32
|
2018-01-13 05:12:19 +08:00
|
|
|
>;
|
|
|
|
defm BUFFER_LOAD_FORMAT_D16_XYZ_gfx80 : MUBUF_Pseudo_Loads <
|
2019-08-18 08:20:43 +08:00
|
|
|
"buffer_load_format_d16_xyz", v3i32
|
2018-01-13 05:12:19 +08:00
|
|
|
>;
|
|
|
|
defm BUFFER_LOAD_FORMAT_D16_XYZW_gfx80 : MUBUF_Pseudo_Loads <
|
2019-08-18 08:20:43 +08:00
|
|
|
"buffer_load_format_d16_xyzw", v4i32
|
2018-01-13 05:12:19 +08:00
|
|
|
>;
|
|
|
|
defm BUFFER_STORE_FORMAT_D16_X_gfx80 : MUBUF_Pseudo_Stores <
|
2019-08-18 08:20:43 +08:00
|
|
|
"buffer_store_format_d16_x", i32
|
2018-01-13 05:12:19 +08:00
|
|
|
>;
|
|
|
|
defm BUFFER_STORE_FORMAT_D16_XY_gfx80 : MUBUF_Pseudo_Stores <
|
2019-08-18 08:20:43 +08:00
|
|
|
"buffer_store_format_d16_xy", v2i32
|
2018-01-13 05:12:19 +08:00
|
|
|
>;
|
|
|
|
defm BUFFER_STORE_FORMAT_D16_XYZ_gfx80 : MUBUF_Pseudo_Stores <
|
2019-08-18 08:20:43 +08:00
|
|
|
"buffer_store_format_d16_xyz", v3i32
|
2018-01-13 05:12:19 +08:00
|
|
|
>;
|
|
|
|
defm BUFFER_STORE_FORMAT_D16_XYZW_gfx80 : MUBUF_Pseudo_Stores <
|
2019-08-18 08:20:43 +08:00
|
|
|
"buffer_store_format_d16_xyzw", v4i32
|
2018-01-13 05:12:19 +08:00
|
|
|
>;
|
|
|
|
} // End HasUnpackedD16VMem.
|
|
|
|
|
AMDGPU: Turn D16 for MIMG instructions into a regular operand
Summary:
This allows us to reduce the number of different machine instruction
opcodes, which reduces the table sizes and helps flatten the TableGen
multiclass hierarchies.
We can do this because for each hardware MIMG opcode, we have a full set
of IMAGE_xxx_Vn_Vm machine instructions for all required sizes of vdata
and vaddr registers. Instead of having separate D16 machine instructions,
a packed D16 instructions loading e.g. 4 components can simply use the
same V2 opcode variant that non-D16 instructions use.
We still require a TSFlag for D16 buffer instructions, because the
D16-ness of buffer instructions is part of the opcode. Renaming the flag
should help avoid future confusion.
The one non-obvious code change is that for gather4 instructions, the
disassembler can no longer automatically decide whether to use a V2 or
a V4 variant. The existing logic which choose the correct variant for
other MIMG instruction is extended to cover gather4 as well.
As a bonus, some of the assembler error messages are now more helpful
(e.g., complaining about a wrong data size instead of a non-existing
instruction).
While we're at it, delete a whole bunch of dead legacy TableGen code.
Change-Id: I89b02c2841c06f95e662541433e597f5d4553978
Reviewers: arsenm, rampitec, kzhuravl, artem.tamazov, dp, rtaylor
Subscribers: wdng, yaxunl, dstuttard, tpr, t-tye, llvm-commits
Differential Revision: https://reviews.llvm.org/D47434
llvm-svn: 335222
2018-06-21 21:36:01 +08:00
|
|
|
let SubtargetPredicate = HasPackedD16VMem, D16Buf = 1 in {
|
2018-01-13 05:12:19 +08:00
|
|
|
defm BUFFER_LOAD_FORMAT_D16_X : MUBUF_Pseudo_Loads <
|
2019-08-18 08:20:43 +08:00
|
|
|
"buffer_load_format_d16_x", f16
|
2018-01-13 05:12:19 +08:00
|
|
|
>;
|
|
|
|
defm BUFFER_LOAD_FORMAT_D16_XY : MUBUF_Pseudo_Loads <
|
2019-08-18 08:20:43 +08:00
|
|
|
"buffer_load_format_d16_xy", v2f16
|
2018-01-13 05:12:19 +08:00
|
|
|
>;
|
|
|
|
defm BUFFER_LOAD_FORMAT_D16_XYZ : MUBUF_Pseudo_Loads <
|
2019-08-18 08:20:43 +08:00
|
|
|
"buffer_load_format_d16_xyz", v3f16
|
2018-01-13 05:12:19 +08:00
|
|
|
>;
|
|
|
|
defm BUFFER_LOAD_FORMAT_D16_XYZW : MUBUF_Pseudo_Loads <
|
2019-08-18 08:20:43 +08:00
|
|
|
"buffer_load_format_d16_xyzw", v4f16
|
2018-01-13 05:12:19 +08:00
|
|
|
>;
|
|
|
|
defm BUFFER_STORE_FORMAT_D16_X : MUBUF_Pseudo_Stores <
|
2019-08-18 08:20:43 +08:00
|
|
|
"buffer_store_format_d16_x", f16
|
2018-01-13 05:12:19 +08:00
|
|
|
>;
|
|
|
|
defm BUFFER_STORE_FORMAT_D16_XY : MUBUF_Pseudo_Stores <
|
2019-08-18 08:20:43 +08:00
|
|
|
"buffer_store_format_d16_xy", v2f16
|
2018-01-13 05:12:19 +08:00
|
|
|
>;
|
|
|
|
defm BUFFER_STORE_FORMAT_D16_XYZ : MUBUF_Pseudo_Stores <
|
2019-08-18 08:20:43 +08:00
|
|
|
"buffer_store_format_d16_xyz", v3f16
|
2018-01-13 05:12:19 +08:00
|
|
|
>;
|
|
|
|
defm BUFFER_STORE_FORMAT_D16_XYZW : MUBUF_Pseudo_Stores <
|
2019-08-18 08:20:43 +08:00
|
|
|
"buffer_store_format_d16_xyzw", v4f16
|
2018-01-13 05:12:19 +08:00
|
|
|
>;
|
|
|
|
} // End HasPackedD16VMem.
|
|
|
|
|
2018-02-21 21:13:48 +08:00
|
|
|
defm BUFFER_LOAD_UBYTE : MUBUF_Pseudo_Loads_Lds <
|
2019-08-18 08:20:43 +08:00
|
|
|
"buffer_load_ubyte", i32
|
2016-09-10 21:09:16 +08:00
|
|
|
>;
|
2018-02-21 21:13:48 +08:00
|
|
|
defm BUFFER_LOAD_SBYTE : MUBUF_Pseudo_Loads_Lds <
|
2019-08-18 08:20:43 +08:00
|
|
|
"buffer_load_sbyte", i32
|
2016-09-10 21:09:16 +08:00
|
|
|
>;
|
2018-02-21 21:13:48 +08:00
|
|
|
defm BUFFER_LOAD_USHORT : MUBUF_Pseudo_Loads_Lds <
|
2019-08-18 08:20:43 +08:00
|
|
|
"buffer_load_ushort", i32
|
2016-09-10 21:09:16 +08:00
|
|
|
>;
|
2018-02-21 21:13:48 +08:00
|
|
|
defm BUFFER_LOAD_SSHORT : MUBUF_Pseudo_Loads_Lds <
|
2019-08-18 08:20:43 +08:00
|
|
|
"buffer_load_sshort", i32
|
2016-09-10 21:09:16 +08:00
|
|
|
>;
|
2018-02-21 21:13:48 +08:00
|
|
|
defm BUFFER_LOAD_DWORD : MUBUF_Pseudo_Loads_Lds <
|
2019-08-18 08:20:43 +08:00
|
|
|
"buffer_load_dword", i32
|
2016-09-10 21:09:16 +08:00
|
|
|
>;
|
|
|
|
defm BUFFER_LOAD_DWORDX2 : MUBUF_Pseudo_Loads <
|
2019-08-18 08:20:43 +08:00
|
|
|
"buffer_load_dwordx2", v2i32
|
2016-09-10 21:09:16 +08:00
|
|
|
>;
|
2016-10-07 23:53:16 +08:00
|
|
|
defm BUFFER_LOAD_DWORDX3 : MUBUF_Pseudo_Loads <
|
2019-08-18 08:20:43 +08:00
|
|
|
"buffer_load_dwordx3", v3i32
|
2016-10-07 23:53:16 +08:00
|
|
|
>;
|
2016-09-10 21:09:16 +08:00
|
|
|
defm BUFFER_LOAD_DWORDX4 : MUBUF_Pseudo_Loads <
|
2019-08-18 08:20:43 +08:00
|
|
|
"buffer_load_dwordx4", v4i32
|
2016-09-10 21:09:16 +08:00
|
|
|
>;
|
2018-06-13 23:32:46 +08:00
|
|
|
|
2019-07-16 10:46:05 +08:00
|
|
|
defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_UBYTE", i32, extloadi8_global>;
|
|
|
|
defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_UBYTE", i32, zextloadi8_global>;
|
2019-07-16 05:41:44 +08:00
|
|
|
defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_SBYTE", i32, sextloadi8_global>;
|
2019-07-16 10:46:05 +08:00
|
|
|
defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_USHORT", i32, extloadi16_global>;
|
|
|
|
defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_USHORT", i32, zextloadi16_global>;
|
2019-07-16 05:41:44 +08:00
|
|
|
defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_SSHORT", i32, sextloadi16_global>;
|
|
|
|
defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_DWORD", i32, load_global>;
|
|
|
|
defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_DWORDX2", v2i32, load_global>;
|
|
|
|
defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_DWORDX3", v3i32, load_global>;
|
|
|
|
defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_DWORDX4", v4i32, load_global>;
|
|
|
|
|
2018-06-13 23:32:46 +08:00
|
|
|
// This is not described in AMD documentation,
|
|
|
|
// but 'lds' versions of these opcodes are available
|
|
|
|
// in at least GFX8+ chips. See Bug 37653.
|
2019-04-06 17:20:48 +08:00
|
|
|
let SubtargetPredicate = isGFX8GFX9 in {
|
2018-06-13 23:32:46 +08:00
|
|
|
defm BUFFER_LOAD_DWORDX2_LDS : MUBUF_Pseudo_Loads <
|
2019-08-18 08:20:43 +08:00
|
|
|
"buffer_load_dwordx2", v2i32, null_frag, 0, 1
|
2018-06-13 23:32:46 +08:00
|
|
|
>;
|
|
|
|
defm BUFFER_LOAD_DWORDX3_LDS : MUBUF_Pseudo_Loads <
|
2019-08-18 08:20:43 +08:00
|
|
|
"buffer_load_dwordx3", v3i32, null_frag, 0, 1
|
2018-06-13 23:32:46 +08:00
|
|
|
>;
|
|
|
|
defm BUFFER_LOAD_DWORDX4_LDS : MUBUF_Pseudo_Loads <
|
2019-08-18 08:20:43 +08:00
|
|
|
"buffer_load_dwordx4", v4i32, null_frag, 0, 1
|
2018-06-13 23:32:46 +08:00
|
|
|
>;
|
|
|
|
}
|
|
|
|
|
2016-09-10 21:09:16 +08:00
|
|
|
defm BUFFER_STORE_BYTE : MUBUF_Pseudo_Stores <
|
2019-08-18 08:20:43 +08:00
|
|
|
"buffer_store_byte", i32, truncstorei8_global
|
2016-09-10 21:09:16 +08:00
|
|
|
>;
|
|
|
|
defm BUFFER_STORE_SHORT : MUBUF_Pseudo_Stores <
|
2019-08-18 08:20:43 +08:00
|
|
|
"buffer_store_short", i32, truncstorei16_global
|
2016-09-10 21:09:16 +08:00
|
|
|
>;
|
|
|
|
defm BUFFER_STORE_DWORD : MUBUF_Pseudo_Stores <
|
2019-08-18 08:20:43 +08:00
|
|
|
"buffer_store_dword", i32, store_global
|
2016-09-10 21:09:16 +08:00
|
|
|
>;
|
|
|
|
defm BUFFER_STORE_DWORDX2 : MUBUF_Pseudo_Stores <
|
2019-08-18 08:20:43 +08:00
|
|
|
"buffer_store_dwordx2", v2i32, store_global
|
2016-09-10 21:09:16 +08:00
|
|
|
>;
|
2016-10-07 23:53:16 +08:00
|
|
|
defm BUFFER_STORE_DWORDX3 : MUBUF_Pseudo_Stores <
|
2019-08-18 08:20:43 +08:00
|
|
|
"buffer_store_dwordx3", v3i32, store_global
|
2016-10-07 23:53:16 +08:00
|
|
|
>;
|
2016-09-10 21:09:16 +08:00
|
|
|
defm BUFFER_STORE_DWORDX4 : MUBUF_Pseudo_Stores <
|
2019-08-18 08:20:43 +08:00
|
|
|
"buffer_store_dwordx4", v4i32, store_global
|
2016-09-10 21:09:16 +08:00
|
|
|
>;
|
|
|
|
defm BUFFER_ATOMIC_SWAP : MUBUF_Pseudo_Atomics <
|
2019-08-01 11:25:52 +08:00
|
|
|
"buffer_atomic_swap", VGPR_32, i32, atomic_swap_global_32
|
2016-09-10 21:09:16 +08:00
|
|
|
>;
|
|
|
|
defm BUFFER_ATOMIC_CMPSWAP : MUBUF_Pseudo_Atomics <
|
|
|
|
"buffer_atomic_cmpswap", VReg_64, v2i32, null_frag
|
|
|
|
>;
|
|
|
|
defm BUFFER_ATOMIC_ADD : MUBUF_Pseudo_Atomics <
|
2019-08-01 11:25:52 +08:00
|
|
|
"buffer_atomic_add", VGPR_32, i32, atomic_load_add_global_32
|
2016-09-10 21:09:16 +08:00
|
|
|
>;
|
|
|
|
defm BUFFER_ATOMIC_SUB : MUBUF_Pseudo_Atomics <
|
2019-08-01 11:25:52 +08:00
|
|
|
"buffer_atomic_sub", VGPR_32, i32, atomic_load_sub_global_32
|
2016-09-10 21:09:16 +08:00
|
|
|
>;
|
|
|
|
defm BUFFER_ATOMIC_SMIN : MUBUF_Pseudo_Atomics <
|
2019-08-01 11:25:52 +08:00
|
|
|
"buffer_atomic_smin", VGPR_32, i32, atomic_load_min_global_32
|
2016-09-10 21:09:16 +08:00
|
|
|
>;
|
|
|
|
defm BUFFER_ATOMIC_UMIN : MUBUF_Pseudo_Atomics <
|
2019-08-01 11:25:52 +08:00
|
|
|
"buffer_atomic_umin", VGPR_32, i32, atomic_load_umin_global_32
|
2016-09-10 21:09:16 +08:00
|
|
|
>;
|
|
|
|
defm BUFFER_ATOMIC_SMAX : MUBUF_Pseudo_Atomics <
|
2019-08-01 11:25:52 +08:00
|
|
|
"buffer_atomic_smax", VGPR_32, i32, atomic_load_max_global_32
|
2016-09-10 21:09:16 +08:00
|
|
|
>;
|
|
|
|
defm BUFFER_ATOMIC_UMAX : MUBUF_Pseudo_Atomics <
|
2019-08-01 11:25:52 +08:00
|
|
|
"buffer_atomic_umax", VGPR_32, i32, atomic_load_umax_global_32
|
2016-09-10 21:09:16 +08:00
|
|
|
>;
|
|
|
|
defm BUFFER_ATOMIC_AND : MUBUF_Pseudo_Atomics <
|
2019-08-01 11:25:52 +08:00
|
|
|
"buffer_atomic_and", VGPR_32, i32, atomic_load_and_global_32
|
2016-09-10 21:09:16 +08:00
|
|
|
>;
|
|
|
|
defm BUFFER_ATOMIC_OR : MUBUF_Pseudo_Atomics <
|
2019-08-01 11:25:52 +08:00
|
|
|
"buffer_atomic_or", VGPR_32, i32, atomic_load_or_global_32
|
2016-09-10 21:09:16 +08:00
|
|
|
>;
|
|
|
|
defm BUFFER_ATOMIC_XOR : MUBUF_Pseudo_Atomics <
|
2019-08-01 11:25:52 +08:00
|
|
|
"buffer_atomic_xor", VGPR_32, i32, atomic_load_xor_global_32
|
2016-09-10 21:09:16 +08:00
|
|
|
>;
|
|
|
|
defm BUFFER_ATOMIC_INC : MUBUF_Pseudo_Atomics <
|
2019-08-01 11:25:52 +08:00
|
|
|
"buffer_atomic_inc", VGPR_32, i32, atomic_inc_global_32
|
2016-09-10 21:09:16 +08:00
|
|
|
>;
|
|
|
|
defm BUFFER_ATOMIC_DEC : MUBUF_Pseudo_Atomics <
|
2019-08-01 11:25:52 +08:00
|
|
|
"buffer_atomic_dec", VGPR_32, i32, atomic_dec_global_32
|
2016-09-10 21:09:16 +08:00
|
|
|
>;
|
|
|
|
defm BUFFER_ATOMIC_SWAP_X2 : MUBUF_Pseudo_Atomics <
|
2019-08-01 11:25:52 +08:00
|
|
|
"buffer_atomic_swap_x2", VReg_64, i64, atomic_swap_global_64
|
2016-09-10 21:09:16 +08:00
|
|
|
>;
|
|
|
|
defm BUFFER_ATOMIC_CMPSWAP_X2 : MUBUF_Pseudo_Atomics <
|
|
|
|
"buffer_atomic_cmpswap_x2", VReg_128, v2i64, null_frag
|
|
|
|
>;
|
|
|
|
defm BUFFER_ATOMIC_ADD_X2 : MUBUF_Pseudo_Atomics <
|
2019-08-01 11:25:52 +08:00
|
|
|
"buffer_atomic_add_x2", VReg_64, i64, atomic_load_add_global_64
|
2016-09-10 21:09:16 +08:00
|
|
|
>;
|
|
|
|
defm BUFFER_ATOMIC_SUB_X2 : MUBUF_Pseudo_Atomics <
|
2019-08-01 11:25:52 +08:00
|
|
|
"buffer_atomic_sub_x2", VReg_64, i64, atomic_load_sub_global_64
|
2016-09-10 21:09:16 +08:00
|
|
|
>;
|
|
|
|
defm BUFFER_ATOMIC_SMIN_X2 : MUBUF_Pseudo_Atomics <
|
2019-08-01 11:25:52 +08:00
|
|
|
"buffer_atomic_smin_x2", VReg_64, i64, atomic_load_min_global_64
|
2016-09-10 21:09:16 +08:00
|
|
|
>;
|
|
|
|
defm BUFFER_ATOMIC_UMIN_X2 : MUBUF_Pseudo_Atomics <
|
2019-08-01 11:25:52 +08:00
|
|
|
"buffer_atomic_umin_x2", VReg_64, i64, atomic_load_umin_global_64
|
2016-09-10 21:09:16 +08:00
|
|
|
>;
|
|
|
|
defm BUFFER_ATOMIC_SMAX_X2 : MUBUF_Pseudo_Atomics <
|
2019-08-01 11:25:52 +08:00
|
|
|
"buffer_atomic_smax_x2", VReg_64, i64, atomic_load_max_global_64
|
2016-09-10 21:09:16 +08:00
|
|
|
>;
|
|
|
|
defm BUFFER_ATOMIC_UMAX_X2 : MUBUF_Pseudo_Atomics <
|
2019-08-01 11:25:52 +08:00
|
|
|
"buffer_atomic_umax_x2", VReg_64, i64, atomic_load_umax_global_64
|
2016-09-10 21:09:16 +08:00
|
|
|
>;
|
|
|
|
defm BUFFER_ATOMIC_AND_X2 : MUBUF_Pseudo_Atomics <
|
2019-08-01 11:25:52 +08:00
|
|
|
"buffer_atomic_and_x2", VReg_64, i64, atomic_load_and_global_64
|
2016-09-10 21:09:16 +08:00
|
|
|
>;
|
|
|
|
defm BUFFER_ATOMIC_OR_X2 : MUBUF_Pseudo_Atomics <
|
2019-08-01 11:25:52 +08:00
|
|
|
"buffer_atomic_or_x2", VReg_64, i64, atomic_load_or_global_64
|
2016-09-10 21:09:16 +08:00
|
|
|
>;
|
|
|
|
defm BUFFER_ATOMIC_XOR_X2 : MUBUF_Pseudo_Atomics <
|
2019-08-01 11:25:52 +08:00
|
|
|
"buffer_atomic_xor_x2", VReg_64, i64, atomic_load_xor_global_64
|
2016-09-10 21:09:16 +08:00
|
|
|
>;
|
|
|
|
defm BUFFER_ATOMIC_INC_X2 : MUBUF_Pseudo_Atomics <
|
2019-08-01 11:25:52 +08:00
|
|
|
"buffer_atomic_inc_x2", VReg_64, i64, atomic_inc_global_64
|
2016-09-10 21:09:16 +08:00
|
|
|
>;
|
|
|
|
defm BUFFER_ATOMIC_DEC_X2 : MUBUF_Pseudo_Atomics <
|
2019-08-01 11:25:52 +08:00
|
|
|
"buffer_atomic_dec_x2", VReg_64, i64, atomic_dec_global_64
|
2016-09-10 21:09:16 +08:00
|
|
|
>;
|
|
|
|
|
2019-04-06 17:20:48 +08:00
|
|
|
let SubtargetPredicate = isGFX8GFX9 in {
|
2018-03-13 01:29:24 +08:00
|
|
|
def BUFFER_STORE_LDS_DWORD : MUBUF_Pseudo_Store_Lds <"buffer_store_lds_dword">;
|
|
|
|
}
|
|
|
|
|
2019-04-06 02:24:34 +08:00
|
|
|
let SubtargetPredicate = isGFX6 in { // isn't on CI & VI
|
2016-09-10 21:09:16 +08:00
|
|
|
/*
|
|
|
|
defm BUFFER_ATOMIC_RSUB : MUBUF_Pseudo_Atomics <"buffer_atomic_rsub">;
|
|
|
|
defm BUFFER_ATOMIC_FCMPSWAP : MUBUF_Pseudo_Atomics <"buffer_atomic_fcmpswap">;
|
|
|
|
defm BUFFER_ATOMIC_FMIN : MUBUF_Pseudo_Atomics <"buffer_atomic_fmin">;
|
|
|
|
defm BUFFER_ATOMIC_FMAX : MUBUF_Pseudo_Atomics <"buffer_atomic_fmax">;
|
|
|
|
defm BUFFER_ATOMIC_RSUB_X2 : MUBUF_Pseudo_Atomics <"buffer_atomic_rsub_x2">;
|
|
|
|
defm BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_Pseudo_Atomics <"buffer_atomic_fcmpswap_x2">;
|
|
|
|
defm BUFFER_ATOMIC_FMIN_X2 : MUBUF_Pseudo_Atomics <"buffer_atomic_fmin_x2">;
|
|
|
|
defm BUFFER_ATOMIC_FMAX_X2 : MUBUF_Pseudo_Atomics <"buffer_atomic_fmax_x2">;
|
|
|
|
*/
|
|
|
|
|
|
|
|
def BUFFER_WBINVL1_SC : MUBUF_Invalidate <"buffer_wbinvl1_sc",
|
|
|
|
int_amdgcn_buffer_wbinvl1_sc>;
|
|
|
|
}
|
|
|
|
|
2017-09-02 02:36:06 +08:00
|
|
|
let SubtargetPredicate = HasD16LoadStore in {
|
|
|
|
|
|
|
|
defm BUFFER_LOAD_UBYTE_D16 : MUBUF_Pseudo_Loads <
|
2019-08-18 08:20:43 +08:00
|
|
|
"buffer_load_ubyte_d16", i32, null_frag, 1
|
2017-09-02 02:36:06 +08:00
|
|
|
>;
|
|
|
|
|
|
|
|
defm BUFFER_LOAD_UBYTE_D16_HI : MUBUF_Pseudo_Loads <
|
2019-08-18 08:20:43 +08:00
|
|
|
"buffer_load_ubyte_d16_hi", i32, null_frag, 1
|
2017-09-02 02:36:06 +08:00
|
|
|
>;
|
|
|
|
|
|
|
|
defm BUFFER_LOAD_SBYTE_D16 : MUBUF_Pseudo_Loads <
|
2019-08-18 08:20:43 +08:00
|
|
|
"buffer_load_sbyte_d16", i32, null_frag, 1
|
2017-09-02 02:36:06 +08:00
|
|
|
>;
|
|
|
|
|
|
|
|
defm BUFFER_LOAD_SBYTE_D16_HI : MUBUF_Pseudo_Loads <
|
2019-08-18 08:20:43 +08:00
|
|
|
"buffer_load_sbyte_d16_hi", i32, null_frag, 1
|
2017-09-02 02:36:06 +08:00
|
|
|
>;
|
|
|
|
|
|
|
|
defm BUFFER_LOAD_SHORT_D16 : MUBUF_Pseudo_Loads <
|
2019-08-18 08:20:43 +08:00
|
|
|
"buffer_load_short_d16", i32, null_frag, 1
|
2017-09-02 02:36:06 +08:00
|
|
|
>;
|
|
|
|
|
|
|
|
defm BUFFER_LOAD_SHORT_D16_HI : MUBUF_Pseudo_Loads <
|
2019-08-18 08:20:43 +08:00
|
|
|
"buffer_load_short_d16_hi", i32, null_frag, 1
|
2017-09-02 02:36:06 +08:00
|
|
|
>;
|
|
|
|
|
|
|
|
defm BUFFER_STORE_BYTE_D16_HI : MUBUF_Pseudo_Stores <
|
2019-08-18 08:20:43 +08:00
|
|
|
"buffer_store_byte_d16_hi", i32
|
2017-09-02 02:36:06 +08:00
|
|
|
>;
|
|
|
|
|
|
|
|
defm BUFFER_STORE_SHORT_D16_HI : MUBUF_Pseudo_Stores <
|
2019-08-18 08:20:43 +08:00
|
|
|
"buffer_store_short_d16_hi", i32
|
2017-09-02 02:36:06 +08:00
|
|
|
>;
|
|
|
|
|
2018-03-28 22:53:13 +08:00
|
|
|
defm BUFFER_LOAD_FORMAT_D16_HI_X : MUBUF_Pseudo_Loads <
|
2019-08-18 08:20:43 +08:00
|
|
|
"buffer_load_format_d16_hi_x", i32
|
2018-03-28 22:53:13 +08:00
|
|
|
>;
|
|
|
|
defm BUFFER_STORE_FORMAT_D16_HI_X : MUBUF_Pseudo_Stores <
|
2019-08-18 08:20:43 +08:00
|
|
|
"buffer_store_format_d16_hi_x", i32
|
2018-03-28 22:53:13 +08:00
|
|
|
>;
|
|
|
|
|
2017-09-02 02:36:06 +08:00
|
|
|
} // End HasD16LoadStore
|
|
|
|
|
2016-09-10 21:09:16 +08:00
|
|
|
def BUFFER_WBINVL1 : MUBUF_Invalidate <"buffer_wbinvl1",
|
|
|
|
int_amdgcn_buffer_wbinvl1>;
|
|
|
|
|
2019-07-11 08:10:17 +08:00
|
|
|
let SubtargetPredicate = HasAtomicFaddInsts in {
|
|
|
|
|
|
|
|
defm BUFFER_ATOMIC_ADD_F32 : MUBUF_Pseudo_Atomics_NO_RTN <
|
2019-08-01 11:22:40 +08:00
|
|
|
"buffer_atomic_add_f32", VGPR_32, f32, atomic_fadd_global_noret
|
2019-07-11 08:10:17 +08:00
|
|
|
>;
|
|
|
|
defm BUFFER_ATOMIC_PK_ADD_F16 : MUBUF_Pseudo_Atomics_NO_RTN <
|
2019-08-01 11:22:40 +08:00
|
|
|
"buffer_atomic_pk_add_f16", VGPR_32, v2f16, atomic_pk_fadd_global_noret
|
2019-07-11 08:10:17 +08:00
|
|
|
>;
|
|
|
|
|
|
|
|
} // End SubtargetPredicate = HasAtomicFaddInsts
|
|
|
|
|
2016-09-10 21:09:16 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// MTBUF Instructions
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2017-06-23 00:29:22 +08:00
|
|
|
defm TBUFFER_LOAD_FORMAT_X : MTBUF_Pseudo_Loads <"tbuffer_load_format_x", VGPR_32>;
|
|
|
|
defm TBUFFER_LOAD_FORMAT_XY : MTBUF_Pseudo_Loads <"tbuffer_load_format_xy", VReg_64>;
|
2019-03-22 22:58:02 +08:00
|
|
|
defm TBUFFER_LOAD_FORMAT_XYZ : MTBUF_Pseudo_Loads <"tbuffer_load_format_xyz", VReg_96>;
|
2017-06-23 00:29:22 +08:00
|
|
|
defm TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Pseudo_Loads <"tbuffer_load_format_xyzw", VReg_128>;
|
|
|
|
defm TBUFFER_STORE_FORMAT_X : MTBUF_Pseudo_Stores <"tbuffer_store_format_x", VGPR_32>;
|
|
|
|
defm TBUFFER_STORE_FORMAT_XY : MTBUF_Pseudo_Stores <"tbuffer_store_format_xy", VReg_64>;
|
2019-03-22 22:58:02 +08:00
|
|
|
defm TBUFFER_STORE_FORMAT_XYZ : MTBUF_Pseudo_Stores <"tbuffer_store_format_xyz", VReg_96>;
|
2017-06-23 00:29:22 +08:00
|
|
|
defm TBUFFER_STORE_FORMAT_XYZW : MTBUF_Pseudo_Stores <"tbuffer_store_format_xyzw", VReg_128>;
|
2016-09-10 21:09:16 +08:00
|
|
|
|
AMDGPU: Turn D16 for MIMG instructions into a regular operand
Summary:
This allows us to reduce the number of different machine instruction
opcodes, which reduces the table sizes and helps flatten the TableGen
multiclass hierarchies.
We can do this because for each hardware MIMG opcode, we have a full set
of IMAGE_xxx_Vn_Vm machine instructions for all required sizes of vdata
and vaddr registers. Instead of having separate D16 machine instructions,
a packed D16 instructions loading e.g. 4 components can simply use the
same V2 opcode variant that non-D16 instructions use.
We still require a TSFlag for D16 buffer instructions, because the
D16-ness of buffer instructions is part of the opcode. Renaming the flag
should help avoid future confusion.
The one non-obvious code change is that for gather4 instructions, the
disassembler can no longer automatically decide whether to use a V2 or
a V4 variant. The existing logic which choose the correct variant for
other MIMG instruction is extended to cover gather4 as well.
As a bonus, some of the assembler error messages are now more helpful
(e.g., complaining about a wrong data size instead of a non-existing
instruction).
While we're at it, delete a whole bunch of dead legacy TableGen code.
Change-Id: I89b02c2841c06f95e662541433e597f5d4553978
Reviewers: arsenm, rampitec, kzhuravl, artem.tamazov, dp, rtaylor
Subscribers: wdng, yaxunl, dstuttard, tpr, t-tye, llvm-commits
Differential Revision: https://reviews.llvm.org/D47434
llvm-svn: 335222
2018-06-21 21:36:01 +08:00
|
|
|
let SubtargetPredicate = HasUnpackedD16VMem, D16Buf = 1 in {
|
2018-01-13 05:12:19 +08:00
|
|
|
defm TBUFFER_LOAD_FORMAT_D16_X_gfx80 : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_x", VGPR_32>;
|
|
|
|
defm TBUFFER_LOAD_FORMAT_D16_XY_gfx80 : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_xy", VReg_64>;
|
|
|
|
defm TBUFFER_LOAD_FORMAT_D16_XYZ_gfx80 : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_xyz", VReg_96>;
|
|
|
|
defm TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80 : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_xyzw", VReg_128>;
|
|
|
|
defm TBUFFER_STORE_FORMAT_D16_X_gfx80 : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_x", VGPR_32>;
|
|
|
|
defm TBUFFER_STORE_FORMAT_D16_XY_gfx80 : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_xy", VReg_64>;
|
|
|
|
defm TBUFFER_STORE_FORMAT_D16_XYZ_gfx80 : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_xyz", VReg_96>;
|
|
|
|
defm TBUFFER_STORE_FORMAT_D16_XYZW_gfx80 : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_xyzw", VReg_128>;
|
|
|
|
} // End HasUnpackedD16VMem.
|
|
|
|
|
AMDGPU: Turn D16 for MIMG instructions into a regular operand
Summary:
This allows us to reduce the number of different machine instruction
opcodes, which reduces the table sizes and helps flatten the TableGen
multiclass hierarchies.
We can do this because for each hardware MIMG opcode, we have a full set
of IMAGE_xxx_Vn_Vm machine instructions for all required sizes of vdata
and vaddr registers. Instead of having separate D16 machine instructions,
a packed D16 instructions loading e.g. 4 components can simply use the
same V2 opcode variant that non-D16 instructions use.
We still require a TSFlag for D16 buffer instructions, because the
D16-ness of buffer instructions is part of the opcode. Renaming the flag
should help avoid future confusion.
The one non-obvious code change is that for gather4 instructions, the
disassembler can no longer automatically decide whether to use a V2 or
a V4 variant. The existing logic which choose the correct variant for
other MIMG instruction is extended to cover gather4 as well.
As a bonus, some of the assembler error messages are now more helpful
(e.g., complaining about a wrong data size instead of a non-existing
instruction).
While we're at it, delete a whole bunch of dead legacy TableGen code.
Change-Id: I89b02c2841c06f95e662541433e597f5d4553978
Reviewers: arsenm, rampitec, kzhuravl, artem.tamazov, dp, rtaylor
Subscribers: wdng, yaxunl, dstuttard, tpr, t-tye, llvm-commits
Differential Revision: https://reviews.llvm.org/D47434
llvm-svn: 335222
2018-06-21 21:36:01 +08:00
|
|
|
let SubtargetPredicate = HasPackedD16VMem, D16Buf = 1 in {
|
2018-01-13 05:12:19 +08:00
|
|
|
defm TBUFFER_LOAD_FORMAT_D16_X : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_x", VGPR_32>;
|
|
|
|
defm TBUFFER_LOAD_FORMAT_D16_XY : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_xy", VGPR_32>;
|
|
|
|
defm TBUFFER_LOAD_FORMAT_D16_XYZ : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_xyz", VReg_64>;
|
|
|
|
defm TBUFFER_LOAD_FORMAT_D16_XYZW : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_xyzw", VReg_64>;
|
|
|
|
defm TBUFFER_STORE_FORMAT_D16_X : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_x", VGPR_32>;
|
|
|
|
defm TBUFFER_STORE_FORMAT_D16_XY : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_xy", VGPR_32>;
|
|
|
|
defm TBUFFER_STORE_FORMAT_D16_XYZ : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_xyz", VReg_64>;
|
|
|
|
defm TBUFFER_STORE_FORMAT_D16_XYZW : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_xyzw", VReg_64>;
|
|
|
|
} // End HasPackedD16VMem.
|
|
|
|
|
2019-04-06 02:24:34 +08:00
|
|
|
let SubtargetPredicate = isGFX7Plus in {
|
2016-09-10 21:09:16 +08:00
|
|
|
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// Instruction definitions for CI and newer.
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
def BUFFER_WBINVL1_VOL : MUBUF_Invalidate <"buffer_wbinvl1_vol",
|
|
|
|
int_amdgcn_buffer_wbinvl1_vol>;
|
|
|
|
|
2019-04-06 02:24:34 +08:00
|
|
|
} // End let SubtargetPredicate = isGFX7Plus
|
2016-09-10 21:09:16 +08:00
|
|
|
|
2019-05-01 06:08:23 +08:00
|
|
|
let SubtargetPredicate = isGFX10Plus in {
|
|
|
|
def BUFFER_GL0_INV : MUBUF_Invalidate<"buffer_gl0_inv">;
|
|
|
|
def BUFFER_GL1_INV : MUBUF_Invalidate<"buffer_gl1_inv">;
|
|
|
|
} // End SubtargetPredicate = isGFX10Plus
|
|
|
|
|
2016-09-10 21:09:16 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// MUBUF Patterns
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
[AMDGPU] New tbuffer intrinsics
Summary:
This commit adds new intrinsics
llvm.amdgcn.raw.tbuffer.load
llvm.amdgcn.struct.tbuffer.load
llvm.amdgcn.raw.tbuffer.store
llvm.amdgcn.struct.tbuffer.store
with the following changes from the llvm.amdgcn.tbuffer.* intrinsics:
* there are separate raw and struct versions: raw does not have an index
arg and sets idxen=0 in the instruction, and struct always sets
idxen=1 in the instruction even if the index is 0, to allow for the
fact that gfx9 does bounds checking differently depending on whether
idxen is set;
* there is a combined format arg (dfmt+nfmt)
* there is a combined cachepolicy arg (glc+slc)
* there are now only two offset args: one for the offset that is
included in bounds checking and swizzling, to be split between the
instruction's voffset and immoffset fields, and one for the offset
that is excluded from bounds checking and swizzling, to go into the
instruction's soffset field.
The AMDISD::TBUFFER_* SD nodes always have an index operand, all three
offset operands, combined format operand, combined cachepolicy operand,
and an extra idxen operand.
The tbuffer pseudo- and real instructions now also have a combined
format operand.
The obsolescent llvm.amdgcn.tbuffer.* and llvm.SI.tbuffer.store
intrinsics continue to work.
V2: Separate raw and struct intrinsics.
V3: Moved extract_glc and extract_slc defs to a more sensible place.
V4: Rebased on D49995.
V5: Only two separate offset args instead of three.
V6: Pseudo- and real instructions have joint format operand.
V7: Restored optionality of dfmt and nfmt in assembler.
V8: Addressed minor review comments.
Subscribers: arsenm, kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, t-tye, llvm-commits
Differential Revision: https://reviews.llvm.org/D49026
Change-Id: If22ad77e349fac3a5d2f72dda53c010377d470d4
llvm-svn: 340268
2018-08-21 19:06:05 +08:00
|
|
|
def extract_glc : SDNodeXForm<imm, [{
|
|
|
|
return CurDAG->getTargetConstant(N->getZExtValue() & 1, SDLoc(N), MVT::i8);
|
|
|
|
}]>;
|
|
|
|
|
|
|
|
def extract_slc : SDNodeXForm<imm, [{
|
|
|
|
return CurDAG->getTargetConstant((N->getZExtValue() >> 1) & 1, SDLoc(N), MVT::i8);
|
|
|
|
}]>;
|
|
|
|
|
2019-05-01 06:08:23 +08:00
|
|
|
def extract_dlc : SDNodeXForm<imm, [{
|
|
|
|
return CurDAG->getTargetConstant((N->getZExtValue() >> 2) & 1, SDLoc(N), MVT::i8);
|
|
|
|
}]>;
|
|
|
|
|
2016-09-10 21:09:16 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// buffer_load/store_format patterns
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
multiclass MUBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt,
|
|
|
|
string opcode> {
|
2017-10-03 08:06:41 +08:00
|
|
|
def : GCNPat<
|
Revert r372285 "GlobalISel: Don't materialize immarg arguments to intrinsics"
This broke the Chromium build, causing it to fail with e.g.
fatal error: error in backend: Cannot select: t362: v4i32 = X86ISD::VSHLI t392, Constant:i8<15>
See llvm-commits thread of r372285 for details.
This also reverts r372286, r372287, r372288, r372289, r372290, r372291,
r372292, r372293, r372296, and r372297, which seemed to depend on the
main commit.
> Encode them directly as an imm argument to G_INTRINSIC*.
>
> Since now intrinsics can now define what parameters are required to be
> immediates, avoid using registers for them. Intrinsics could
> potentially want a constant that isn't a legal register type. Also,
> since G_CONSTANT is subject to CSE and legalization, transforms could
> potentially obscure the value (and create extra work for the
> selector). The register bank of a G_CONSTANT is also meaningful, so
> this could throw off future folding and legalization logic for AMDGPU.
>
> This will be much more convenient to work with than needing to call
> getConstantVRegVal and checking if it may have failed for every
> constant intrinsic parameter. AMDGPU has quite a lot of intrinsics wth
> immarg operands, many of which need inspection during lowering. Having
> to find the value in a register is going to add a lot of boilerplate
> and waste compile time.
>
> SelectionDAG has always provided TargetConstant for constants which
> should not be legalized or materialized in a register. The distinction
> between Constant and TargetConstant was somewhat fuzzy, and there was
> no automatic way to force usage of TargetConstant for certain
> intrinsic parameters. They were both ultimately ConstantSDNode, and it
> was inconsistently used. It was quite easy to mis-select an
> instruction requiring an immediate. For SelectionDAG, start emitting
> TargetConstant for these arguments, and using timm to match them.
>
> Most of the work here is to cleanup target handling of constants. Some
> targets process intrinsics through intermediate custom nodes, which
> need to preserve TargetConstant usage to match the intrinsic
> expectation. Pattern inputs now need to distinguish whether a constant
> is merely compatible with an operand or whether it is mandatory.
>
> The GlobalISelEmitter needs to treat timm as a special case of a leaf
> node, simlar to MachineBasicBlock operands. This should also enable
> handling of patterns for some G_* instructions with immediates, like
> G_FENCE or G_EXTRACT.
>
> This does include a workaround for a crash in GlobalISelEmitter when
> ARM tries to uses "imm" in an output with a "timm" pattern source.
llvm-svn: 372314
2019-09-19 20:33:07 +08:00
|
|
|
(vt (name v4i32:$rsrc, 0, 0, i32:$soffset, imm:$offset,
|
|
|
|
imm:$cachepolicy, 0)),
|
2016-09-10 21:09:16 +08:00
|
|
|
(!cast<MUBUF_Pseudo>(opcode # _OFFSET) $rsrc, $soffset, (as_i16imm $offset),
|
2019-05-01 06:08:23 +08:00
|
|
|
(extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
|
2016-09-10 21:09:16 +08:00
|
|
|
>;
|
|
|
|
|
2017-10-03 08:06:41 +08:00
|
|
|
def : GCNPat<
|
Revert r372285 "GlobalISel: Don't materialize immarg arguments to intrinsics"
This broke the Chromium build, causing it to fail with e.g.
fatal error: error in backend: Cannot select: t362: v4i32 = X86ISD::VSHLI t392, Constant:i8<15>
See llvm-commits thread of r372285 for details.
This also reverts r372286, r372287, r372288, r372289, r372290, r372291,
r372292, r372293, r372296, and r372297, which seemed to depend on the
main commit.
> Encode them directly as an imm argument to G_INTRINSIC*.
>
> Since now intrinsics can now define what parameters are required to be
> immediates, avoid using registers for them. Intrinsics could
> potentially want a constant that isn't a legal register type. Also,
> since G_CONSTANT is subject to CSE and legalization, transforms could
> potentially obscure the value (and create extra work for the
> selector). The register bank of a G_CONSTANT is also meaningful, so
> this could throw off future folding and legalization logic for AMDGPU.
>
> This will be much more convenient to work with than needing to call
> getConstantVRegVal and checking if it may have failed for every
> constant intrinsic parameter. AMDGPU has quite a lot of intrinsics wth
> immarg operands, many of which need inspection during lowering. Having
> to find the value in a register is going to add a lot of boilerplate
> and waste compile time.
>
> SelectionDAG has always provided TargetConstant for constants which
> should not be legalized or materialized in a register. The distinction
> between Constant and TargetConstant was somewhat fuzzy, and there was
> no automatic way to force usage of TargetConstant for certain
> intrinsic parameters. They were both ultimately ConstantSDNode, and it
> was inconsistently used. It was quite easy to mis-select an
> instruction requiring an immediate. For SelectionDAG, start emitting
> TargetConstant for these arguments, and using timm to match them.
>
> Most of the work here is to cleanup target handling of constants. Some
> targets process intrinsics through intermediate custom nodes, which
> need to preserve TargetConstant usage to match the intrinsic
> expectation. Pattern inputs now need to distinguish whether a constant
> is merely compatible with an operand or whether it is mandatory.
>
> The GlobalISelEmitter needs to treat timm as a special case of a leaf
> node, simlar to MachineBasicBlock operands. This should also enable
> handling of patterns for some G_* instructions with immediates, like
> G_FENCE or G_EXTRACT.
>
> This does include a workaround for a crash in GlobalISelEmitter when
> ARM tries to uses "imm" in an output with a "timm" pattern source.
llvm-svn: 372314
2019-09-19 20:33:07 +08:00
|
|
|
(vt (name v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, imm:$offset,
|
|
|
|
imm:$cachepolicy, 0)),
|
[AMDGPU] New buffer intrinsics
Summary:
This commit adds new intrinsics
llvm.amdgcn.raw.buffer.load
llvm.amdgcn.raw.buffer.load.format
llvm.amdgcn.raw.buffer.load.format.d16
llvm.amdgcn.struct.buffer.load
llvm.amdgcn.struct.buffer.load.format
llvm.amdgcn.struct.buffer.load.format.d16
llvm.amdgcn.raw.buffer.store
llvm.amdgcn.raw.buffer.store.format
llvm.amdgcn.raw.buffer.store.format.d16
llvm.amdgcn.struct.buffer.store
llvm.amdgcn.struct.buffer.store.format
llvm.amdgcn.struct.buffer.store.format.d16
llvm.amdgcn.raw.buffer.atomic.*
llvm.amdgcn.struct.buffer.atomic.*
with the following changes from the llvm.amdgcn.buffer.*
intrinsics:
* there are separate raw and struct versions: raw does not have an
index arg and sets idxen=0 in the instruction, and struct always sets
idxen=1 in the instruction even if the index is 0, to allow for the
fact that gfx9 does bounds checking differently depending on whether
idxen is set;
* there is a combined cachepolicy arg (glc+slc)
* there are now only two offset args: one for the offset that is
included in bounds checking and swizzling, to be split between the
instruction's voffset and immoffset fields, and one for the offset
that is excluded from bounds checking and swizzling, to go into the
instruction's soffset field.
The AMDISD::BUFFER_* SD nodes always have an index operand, all three
offset operands, combined cachepolicy operand, and an extra idxen
operand.
The obsolescent llvm.amdgcn.buffer.* intrinsics continue to work.
Subscribers: arsenm, kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, t-tye, jfb, llvm-commits
Differential Revision: https://reviews.llvm.org/D50306
Change-Id: If897ea7dc34fcbf4d5496e98cc99a934f62fc205
llvm-svn: 340269
2018-08-21 19:07:10 +08:00
|
|
|
(!cast<MUBUF_Pseudo>(opcode # _OFFEN) $voffset, $rsrc, $soffset, (as_i16imm $offset),
|
2019-05-01 06:08:23 +08:00
|
|
|
(extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
|
2016-09-10 21:09:16 +08:00
|
|
|
>;
|
|
|
|
|
2017-10-03 08:06:41 +08:00
|
|
|
def : GCNPat<
|
Revert r372285 "GlobalISel: Don't materialize immarg arguments to intrinsics"
This broke the Chromium build, causing it to fail with e.g.
fatal error: error in backend: Cannot select: t362: v4i32 = X86ISD::VSHLI t392, Constant:i8<15>
See llvm-commits thread of r372285 for details.
This also reverts r372286, r372287, r372288, r372289, r372290, r372291,
r372292, r372293, r372296, and r372297, which seemed to depend on the
main commit.
> Encode them directly as an imm argument to G_INTRINSIC*.
>
> Since now intrinsics can now define what parameters are required to be
> immediates, avoid using registers for them. Intrinsics could
> potentially want a constant that isn't a legal register type. Also,
> since G_CONSTANT is subject to CSE and legalization, transforms could
> potentially obscure the value (and create extra work for the
> selector). The register bank of a G_CONSTANT is also meaningful, so
> this could throw off future folding and legalization logic for AMDGPU.
>
> This will be much more convenient to work with than needing to call
> getConstantVRegVal and checking if it may have failed for every
> constant intrinsic parameter. AMDGPU has quite a lot of intrinsics wth
> immarg operands, many of which need inspection during lowering. Having
> to find the value in a register is going to add a lot of boilerplate
> and waste compile time.
>
> SelectionDAG has always provided TargetConstant for constants which
> should not be legalized or materialized in a register. The distinction
> between Constant and TargetConstant was somewhat fuzzy, and there was
> no automatic way to force usage of TargetConstant for certain
> intrinsic parameters. They were both ultimately ConstantSDNode, and it
> was inconsistently used. It was quite easy to mis-select an
> instruction requiring an immediate. For SelectionDAG, start emitting
> TargetConstant for these arguments, and using timm to match them.
>
> Most of the work here is to cleanup target handling of constants. Some
> targets process intrinsics through intermediate custom nodes, which
> need to preserve TargetConstant usage to match the intrinsic
> expectation. Pattern inputs now need to distinguish whether a constant
> is merely compatible with an operand or whether it is mandatory.
>
> The GlobalISelEmitter needs to treat timm as a special case of a leaf
> node, simlar to MachineBasicBlock operands. This should also enable
> handling of patterns for some G_* instructions with immediates, like
> G_FENCE or G_EXTRACT.
>
> This does include a workaround for a crash in GlobalISelEmitter when
> ARM tries to uses "imm" in an output with a "timm" pattern source.
llvm-svn: 372314
2019-09-19 20:33:07 +08:00
|
|
|
(vt (name v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, imm:$offset,
|
|
|
|
imm:$cachepolicy, imm)),
|
[AMDGPU] New buffer intrinsics
Summary:
This commit adds new intrinsics
llvm.amdgcn.raw.buffer.load
llvm.amdgcn.raw.buffer.load.format
llvm.amdgcn.raw.buffer.load.format.d16
llvm.amdgcn.struct.buffer.load
llvm.amdgcn.struct.buffer.load.format
llvm.amdgcn.struct.buffer.load.format.d16
llvm.amdgcn.raw.buffer.store
llvm.amdgcn.raw.buffer.store.format
llvm.amdgcn.raw.buffer.store.format.d16
llvm.amdgcn.struct.buffer.store
llvm.amdgcn.struct.buffer.store.format
llvm.amdgcn.struct.buffer.store.format.d16
llvm.amdgcn.raw.buffer.atomic.*
llvm.amdgcn.struct.buffer.atomic.*
with the following changes from the llvm.amdgcn.buffer.*
intrinsics:
* there are separate raw and struct versions: raw does not have an
index arg and sets idxen=0 in the instruction, and struct always sets
idxen=1 in the instruction even if the index is 0, to allow for the
fact that gfx9 does bounds checking differently depending on whether
idxen is set;
* there is a combined cachepolicy arg (glc+slc)
* there are now only two offset args: one for the offset that is
included in bounds checking and swizzling, to be split between the
instruction's voffset and immoffset fields, and one for the offset
that is excluded from bounds checking and swizzling, to go into the
instruction's soffset field.
The AMDISD::BUFFER_* SD nodes always have an index operand, all three
offset operands, combined cachepolicy operand, and an extra idxen
operand.
The obsolescent llvm.amdgcn.buffer.* intrinsics continue to work.
Subscribers: arsenm, kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, t-tye, jfb, llvm-commits
Differential Revision: https://reviews.llvm.org/D50306
Change-Id: If897ea7dc34fcbf4d5496e98cc99a934f62fc205
llvm-svn: 340269
2018-08-21 19:07:10 +08:00
|
|
|
(!cast<MUBUF_Pseudo>(opcode # _IDXEN) $vindex, $rsrc, $soffset, (as_i16imm $offset),
|
2019-05-01 06:08:23 +08:00
|
|
|
(extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
|
2016-09-10 21:09:16 +08:00
|
|
|
>;
|
|
|
|
|
2017-10-03 08:06:41 +08:00
|
|
|
def : GCNPat<
|
Revert r372285 "GlobalISel: Don't materialize immarg arguments to intrinsics"
This broke the Chromium build, causing it to fail with e.g.
fatal error: error in backend: Cannot select: t362: v4i32 = X86ISD::VSHLI t392, Constant:i8<15>
See llvm-commits thread of r372285 for details.
This also reverts r372286, r372287, r372288, r372289, r372290, r372291,
r372292, r372293, r372296, and r372297, which seemed to depend on the
main commit.
> Encode them directly as an imm argument to G_INTRINSIC*.
>
> Since now intrinsics can now define what parameters are required to be
> immediates, avoid using registers for them. Intrinsics could
> potentially want a constant that isn't a legal register type. Also,
> since G_CONSTANT is subject to CSE and legalization, transforms could
> potentially obscure the value (and create extra work for the
> selector). The register bank of a G_CONSTANT is also meaningful, so
> this could throw off future folding and legalization logic for AMDGPU.
>
> This will be much more convenient to work with than needing to call
> getConstantVRegVal and checking if it may have failed for every
> constant intrinsic parameter. AMDGPU has quite a lot of intrinsics wth
> immarg operands, many of which need inspection during lowering. Having
> to find the value in a register is going to add a lot of boilerplate
> and waste compile time.
>
> SelectionDAG has always provided TargetConstant for constants which
> should not be legalized or materialized in a register. The distinction
> between Constant and TargetConstant was somewhat fuzzy, and there was
> no automatic way to force usage of TargetConstant for certain
> intrinsic parameters. They were both ultimately ConstantSDNode, and it
> was inconsistently used. It was quite easy to mis-select an
> instruction requiring an immediate. For SelectionDAG, start emitting
> TargetConstant for these arguments, and using timm to match them.
>
> Most of the work here is to cleanup target handling of constants. Some
> targets process intrinsics through intermediate custom nodes, which
> need to preserve TargetConstant usage to match the intrinsic
> expectation. Pattern inputs now need to distinguish whether a constant
> is merely compatible with an operand or whether it is mandatory.
>
> The GlobalISelEmitter needs to treat timm as a special case of a leaf
> node, simlar to MachineBasicBlock operands. This should also enable
> handling of patterns for some G_* instructions with immediates, like
> G_FENCE or G_EXTRACT.
>
> This does include a workaround for a crash in GlobalISelEmitter when
> ARM tries to uses "imm" in an output with a "timm" pattern source.
llvm-svn: 372314
2019-09-19 20:33:07 +08:00
|
|
|
(vt (name v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, imm:$offset,
|
|
|
|
imm:$cachepolicy, imm)),
|
2016-09-10 21:09:16 +08:00
|
|
|
(!cast<MUBUF_Pseudo>(opcode # _BOTHEN)
|
|
|
|
(REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
|
|
|
|
$rsrc, $soffset, (as_i16imm $offset),
|
2019-05-01 06:08:23 +08:00
|
|
|
(extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
|
2016-09-10 21:09:16 +08:00
|
|
|
>;
|
|
|
|
}
|
|
|
|
|
2016-12-21 01:19:44 +08:00
|
|
|
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format, f32, "BUFFER_LOAD_FORMAT_X">;
|
2018-08-21 19:08:12 +08:00
|
|
|
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format, i32, "BUFFER_LOAD_FORMAT_X">;
|
2016-12-21 01:19:44 +08:00
|
|
|
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format, v2f32, "BUFFER_LOAD_FORMAT_XY">;
|
2018-08-21 19:08:12 +08:00
|
|
|
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format, v2i32, "BUFFER_LOAD_FORMAT_XY">;
|
2019-03-22 22:58:02 +08:00
|
|
|
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format, v3f32, "BUFFER_LOAD_FORMAT_XYZ">;
|
|
|
|
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format, v3i32, "BUFFER_LOAD_FORMAT_XYZ">;
|
2016-12-21 01:19:44 +08:00
|
|
|
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format, v4f32, "BUFFER_LOAD_FORMAT_XYZW">;
|
2018-08-21 19:08:12 +08:00
|
|
|
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format, v4i32, "BUFFER_LOAD_FORMAT_XYZW">;
|
2018-01-13 05:12:19 +08:00
|
|
|
|
|
|
|
let SubtargetPredicate = HasUnpackedD16VMem in {
|
2018-05-22 14:32:10 +08:00
|
|
|
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, f16, "BUFFER_LOAD_FORMAT_D16_X_gfx80">;
|
2018-08-21 19:08:12 +08:00
|
|
|
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, i16, "BUFFER_LOAD_FORMAT_D16_X_gfx80">;
|
2018-01-13 05:12:19 +08:00
|
|
|
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, v2i32, "BUFFER_LOAD_FORMAT_D16_XY_gfx80">;
|
|
|
|
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, v4i32, "BUFFER_LOAD_FORMAT_D16_XYZW_gfx80">;
|
|
|
|
} // End HasUnpackedD16VMem.
|
|
|
|
|
|
|
|
let SubtargetPredicate = HasPackedD16VMem in {
|
2018-05-22 14:32:10 +08:00
|
|
|
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, f16, "BUFFER_LOAD_FORMAT_D16_X">;
|
2018-08-21 19:08:12 +08:00
|
|
|
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, i16, "BUFFER_LOAD_FORMAT_D16_X">;
|
2018-05-22 14:32:10 +08:00
|
|
|
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, v2f16, "BUFFER_LOAD_FORMAT_D16_XY">;
|
2018-08-21 19:08:12 +08:00
|
|
|
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, v2i16, "BUFFER_LOAD_FORMAT_D16_XY">;
|
2018-06-15 23:15:46 +08:00
|
|
|
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, v4f16, "BUFFER_LOAD_FORMAT_D16_XYZW">;
|
2018-08-21 19:08:12 +08:00
|
|
|
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, v4i16, "BUFFER_LOAD_FORMAT_D16_XYZW">;
|
2018-01-13 05:12:19 +08:00
|
|
|
} // End HasPackedD16VMem.
|
|
|
|
|
2016-12-21 01:19:44 +08:00
|
|
|
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load, f32, "BUFFER_LOAD_DWORD">;
|
2018-08-21 19:08:12 +08:00
|
|
|
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load, i32, "BUFFER_LOAD_DWORD">;
|
2019-08-05 23:59:07 +08:00
|
|
|
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load, v2i16, "BUFFER_LOAD_DWORD">;
|
|
|
|
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load, v2f16, "BUFFER_LOAD_DWORD">;
|
2016-12-21 01:19:44 +08:00
|
|
|
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load, v2f32, "BUFFER_LOAD_DWORDX2">;
|
2018-08-21 19:08:12 +08:00
|
|
|
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load, v2i32, "BUFFER_LOAD_DWORDX2">;
|
2019-08-05 23:59:07 +08:00
|
|
|
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load, v4i16, "BUFFER_LOAD_DWORDX2">;
|
|
|
|
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load, v4f16, "BUFFER_LOAD_DWORDX2">;
|
2019-03-22 22:58:02 +08:00
|
|
|
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load, v3f32, "BUFFER_LOAD_DWORDX3">;
|
|
|
|
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load, v3i32, "BUFFER_LOAD_DWORDX3">;
|
2016-12-21 01:19:44 +08:00
|
|
|
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load, v4f32, "BUFFER_LOAD_DWORDX4">;
|
2018-08-21 19:08:12 +08:00
|
|
|
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load, v4i32, "BUFFER_LOAD_DWORDX4">;
|
[AMDGPU] Add buffer/load 8/16 bit overloaded intrinsics
Summary:
Add buffer store/load 8/16 overloaded intrinsics for buffer, raw_buffer and struct_buffer
Change-Id: I166a29f071b2ff4e4683fb0392564b1f223ac61d
Subscribers: arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D59265
llvm-svn: 356465
2019-03-20 00:07:00 +08:00
|
|
|
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_byte, i32, "BUFFER_LOAD_SBYTE">;
|
|
|
|
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_short, i32, "BUFFER_LOAD_SSHORT">;
|
|
|
|
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_ubyte, i32, "BUFFER_LOAD_UBYTE">;
|
|
|
|
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_ushort, i32, "BUFFER_LOAD_USHORT">;
|
2016-09-10 21:09:16 +08:00
|
|
|
|
|
|
|
multiclass MUBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
|
|
|
|
string opcode> {
|
2017-10-03 08:06:41 +08:00
|
|
|
def : GCNPat<
|
Revert r372285 "GlobalISel: Don't materialize immarg arguments to intrinsics"
This broke the Chromium build, causing it to fail with e.g.
fatal error: error in backend: Cannot select: t362: v4i32 = X86ISD::VSHLI t392, Constant:i8<15>
See llvm-commits thread of r372285 for details.
This also reverts r372286, r372287, r372288, r372289, r372290, r372291,
r372292, r372293, r372296, and r372297, which seemed to depend on the
main commit.
> Encode them directly as an imm argument to G_INTRINSIC*.
>
> Since now intrinsics can now define what parameters are required to be
> immediates, avoid using registers for them. Intrinsics could
> potentially want a constant that isn't a legal register type. Also,
> since G_CONSTANT is subject to CSE and legalization, transforms could
> potentially obscure the value (and create extra work for the
> selector). The register bank of a G_CONSTANT is also meaningful, so
> this could throw off future folding and legalization logic for AMDGPU.
>
> This will be much more convenient to work with than needing to call
> getConstantVRegVal and checking if it may have failed for every
> constant intrinsic parameter. AMDGPU has quite a lot of intrinsics wth
> immarg operands, many of which need inspection during lowering. Having
> to find the value in a register is going to add a lot of boilerplate
> and waste compile time.
>
> SelectionDAG has always provided TargetConstant for constants which
> should not be legalized or materialized in a register. The distinction
> between Constant and TargetConstant was somewhat fuzzy, and there was
> no automatic way to force usage of TargetConstant for certain
> intrinsic parameters. They were both ultimately ConstantSDNode, and it
> was inconsistently used. It was quite easy to mis-select an
> instruction requiring an immediate. For SelectionDAG, start emitting
> TargetConstant for these arguments, and using timm to match them.
>
> Most of the work here is to cleanup target handling of constants. Some
> targets process intrinsics through intermediate custom nodes, which
> need to preserve TargetConstant usage to match the intrinsic
> expectation. Pattern inputs now need to distinguish whether a constant
> is merely compatible with an operand or whether it is mandatory.
>
> The GlobalISelEmitter needs to treat timm as a special case of a leaf
> node, simlar to MachineBasicBlock operands. This should also enable
> handling of patterns for some G_* instructions with immediates, like
> G_FENCE or G_EXTRACT.
>
> This does include a workaround for a crash in GlobalISelEmitter when
> ARM tries to uses "imm" in an output with a "timm" pattern source.
llvm-svn: 372314
2019-09-19 20:33:07 +08:00
|
|
|
(name vt:$vdata, v4i32:$rsrc, 0, 0, i32:$soffset, imm:$offset,
|
|
|
|
imm:$cachepolicy, 0),
|
2016-09-10 21:09:16 +08:00
|
|
|
(!cast<MUBUF_Pseudo>(opcode # _OFFSET_exact) $vdata, $rsrc, $soffset, (as_i16imm $offset),
|
2019-05-01 06:08:23 +08:00
|
|
|
(extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
|
2016-09-10 21:09:16 +08:00
|
|
|
>;
|
|
|
|
|
2017-10-03 08:06:41 +08:00
|
|
|
def : GCNPat<
|
Revert r372285 "GlobalISel: Don't materialize immarg arguments to intrinsics"
This broke the Chromium build, causing it to fail with e.g.
fatal error: error in backend: Cannot select: t362: v4i32 = X86ISD::VSHLI t392, Constant:i8<15>
See llvm-commits thread of r372285 for details.
This also reverts r372286, r372287, r372288, r372289, r372290, r372291,
r372292, r372293, r372296, and r372297, which seemed to depend on the
main commit.
> Encode them directly as an imm argument to G_INTRINSIC*.
>
> Since now intrinsics can now define what parameters are required to be
> immediates, avoid using registers for them. Intrinsics could
> potentially want a constant that isn't a legal register type. Also,
> since G_CONSTANT is subject to CSE and legalization, transforms could
> potentially obscure the value (and create extra work for the
> selector). The register bank of a G_CONSTANT is also meaningful, so
> this could throw off future folding and legalization logic for AMDGPU.
>
> This will be much more convenient to work with than needing to call
> getConstantVRegVal and checking if it may have failed for every
> constant intrinsic parameter. AMDGPU has quite a lot of intrinsics wth
> immarg operands, many of which need inspection during lowering. Having
> to find the value in a register is going to add a lot of boilerplate
> and waste compile time.
>
> SelectionDAG has always provided TargetConstant for constants which
> should not be legalized or materialized in a register. The distinction
> between Constant and TargetConstant was somewhat fuzzy, and there was
> no automatic way to force usage of TargetConstant for certain
> intrinsic parameters. They were both ultimately ConstantSDNode, and it
> was inconsistently used. It was quite easy to mis-select an
> instruction requiring an immediate. For SelectionDAG, start emitting
> TargetConstant for these arguments, and using timm to match them.
>
> Most of the work here is to cleanup target handling of constants. Some
> targets process intrinsics through intermediate custom nodes, which
> need to preserve TargetConstant usage to match the intrinsic
> expectation. Pattern inputs now need to distinguish whether a constant
> is merely compatible with an operand or whether it is mandatory.
>
> The GlobalISelEmitter needs to treat timm as a special case of a leaf
> node, simlar to MachineBasicBlock operands. This should also enable
> handling of patterns for some G_* instructions with immediates, like
> G_FENCE or G_EXTRACT.
>
> This does include a workaround for a crash in GlobalISelEmitter when
> ARM tries to uses "imm" in an output with a "timm" pattern source.
llvm-svn: 372314
2019-09-19 20:33:07 +08:00
|
|
|
(name vt:$vdata, v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, imm:$offset,
|
|
|
|
imm:$cachepolicy, 0),
|
[AMDGPU] New buffer intrinsics
Summary:
This commit adds new intrinsics
llvm.amdgcn.raw.buffer.load
llvm.amdgcn.raw.buffer.load.format
llvm.amdgcn.raw.buffer.load.format.d16
llvm.amdgcn.struct.buffer.load
llvm.amdgcn.struct.buffer.load.format
llvm.amdgcn.struct.buffer.load.format.d16
llvm.amdgcn.raw.buffer.store
llvm.amdgcn.raw.buffer.store.format
llvm.amdgcn.raw.buffer.store.format.d16
llvm.amdgcn.struct.buffer.store
llvm.amdgcn.struct.buffer.store.format
llvm.amdgcn.struct.buffer.store.format.d16
llvm.amdgcn.raw.buffer.atomic.*
llvm.amdgcn.struct.buffer.atomic.*
with the following changes from the llvm.amdgcn.buffer.*
intrinsics:
* there are separate raw and struct versions: raw does not have an
index arg and sets idxen=0 in the instruction, and struct always sets
idxen=1 in the instruction even if the index is 0, to allow for the
fact that gfx9 does bounds checking differently depending on whether
idxen is set;
* there is a combined cachepolicy arg (glc+slc)
* there are now only two offset args: one for the offset that is
included in bounds checking and swizzling, to be split between the
instruction's voffset and immoffset fields, and one for the offset
that is excluded from bounds checking and swizzling, to go into the
instruction's soffset field.
The AMDISD::BUFFER_* SD nodes always have an index operand, all three
offset operands, combined cachepolicy operand, and an extra idxen
operand.
The obsolescent llvm.amdgcn.buffer.* intrinsics continue to work.
Subscribers: arsenm, kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, t-tye, jfb, llvm-commits
Differential Revision: https://reviews.llvm.org/D50306
Change-Id: If897ea7dc34fcbf4d5496e98cc99a934f62fc205
llvm-svn: 340269
2018-08-21 19:07:10 +08:00
|
|
|
(!cast<MUBUF_Pseudo>(opcode # _OFFEN_exact) $vdata, $voffset, $rsrc, $soffset,
|
2019-05-01 06:08:23 +08:00
|
|
|
(as_i16imm $offset), (extract_glc $cachepolicy),
|
|
|
|
(extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
|
2016-09-10 21:09:16 +08:00
|
|
|
>;
|
|
|
|
|
2017-10-03 08:06:41 +08:00
|
|
|
def : GCNPat<
|
Revert r372285 "GlobalISel: Don't materialize immarg arguments to intrinsics"
This broke the Chromium build, causing it to fail with e.g.
fatal error: error in backend: Cannot select: t362: v4i32 = X86ISD::VSHLI t392, Constant:i8<15>
See llvm-commits thread of r372285 for details.
This also reverts r372286, r372287, r372288, r372289, r372290, r372291,
r372292, r372293, r372296, and r372297, which seemed to depend on the
main commit.
> Encode them directly as an imm argument to G_INTRINSIC*.
>
> Since now intrinsics can now define what parameters are required to be
> immediates, avoid using registers for them. Intrinsics could
> potentially want a constant that isn't a legal register type. Also,
> since G_CONSTANT is subject to CSE and legalization, transforms could
> potentially obscure the value (and create extra work for the
> selector). The register bank of a G_CONSTANT is also meaningful, so
> this could throw off future folding and legalization logic for AMDGPU.
>
> This will be much more convenient to work with than needing to call
> getConstantVRegVal and checking if it may have failed for every
> constant intrinsic parameter. AMDGPU has quite a lot of intrinsics wth
> immarg operands, many of which need inspection during lowering. Having
> to find the value in a register is going to add a lot of boilerplate
> and waste compile time.
>
> SelectionDAG has always provided TargetConstant for constants which
> should not be legalized or materialized in a register. The distinction
> between Constant and TargetConstant was somewhat fuzzy, and there was
> no automatic way to force usage of TargetConstant for certain
> intrinsic parameters. They were both ultimately ConstantSDNode, and it
> was inconsistently used. It was quite easy to mis-select an
> instruction requiring an immediate. For SelectionDAG, start emitting
> TargetConstant for these arguments, and using timm to match them.
>
> Most of the work here is to cleanup target handling of constants. Some
> targets process intrinsics through intermediate custom nodes, which
> need to preserve TargetConstant usage to match the intrinsic
> expectation. Pattern inputs now need to distinguish whether a constant
> is merely compatible with an operand or whether it is mandatory.
>
> The GlobalISelEmitter needs to treat timm as a special case of a leaf
> node, simlar to MachineBasicBlock operands. This should also enable
> handling of patterns for some G_* instructions with immediates, like
> G_FENCE or G_EXTRACT.
>
> This does include a workaround for a crash in GlobalISelEmitter when
> ARM tries to uses "imm" in an output with a "timm" pattern source.
llvm-svn: 372314
2019-09-19 20:33:07 +08:00
|
|
|
(name vt:$vdata, v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, imm:$offset,
|
|
|
|
imm:$cachepolicy, imm),
|
[AMDGPU] New buffer intrinsics
Summary:
This commit adds new intrinsics
llvm.amdgcn.raw.buffer.load
llvm.amdgcn.raw.buffer.load.format
llvm.amdgcn.raw.buffer.load.format.d16
llvm.amdgcn.struct.buffer.load
llvm.amdgcn.struct.buffer.load.format
llvm.amdgcn.struct.buffer.load.format.d16
llvm.amdgcn.raw.buffer.store
llvm.amdgcn.raw.buffer.store.format
llvm.amdgcn.raw.buffer.store.format.d16
llvm.amdgcn.struct.buffer.store
llvm.amdgcn.struct.buffer.store.format
llvm.amdgcn.struct.buffer.store.format.d16
llvm.amdgcn.raw.buffer.atomic.*
llvm.amdgcn.struct.buffer.atomic.*
with the following changes from the llvm.amdgcn.buffer.*
intrinsics:
* there are separate raw and struct versions: raw does not have an
index arg and sets idxen=0 in the instruction, and struct always sets
idxen=1 in the instruction even if the index is 0, to allow for the
fact that gfx9 does bounds checking differently depending on whether
idxen is set;
* there is a combined cachepolicy arg (glc+slc)
* there are now only two offset args: one for the offset that is
included in bounds checking and swizzling, to be split between the
instruction's voffset and immoffset fields, and one for the offset
that is excluded from bounds checking and swizzling, to go into the
instruction's soffset field.
The AMDISD::BUFFER_* SD nodes always have an index operand, all three
offset operands, combined cachepolicy operand, and an extra idxen
operand.
The obsolescent llvm.amdgcn.buffer.* intrinsics continue to work.
Subscribers: arsenm, kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, t-tye, jfb, llvm-commits
Differential Revision: https://reviews.llvm.org/D50306
Change-Id: If897ea7dc34fcbf4d5496e98cc99a934f62fc205
llvm-svn: 340269
2018-08-21 19:07:10 +08:00
|
|
|
(!cast<MUBUF_Pseudo>(opcode # _IDXEN_exact) $vdata, $vindex, $rsrc, $soffset,
|
2019-05-01 06:08:23 +08:00
|
|
|
(as_i16imm $offset), (extract_glc $cachepolicy),
|
|
|
|
(extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
|
2016-09-10 21:09:16 +08:00
|
|
|
>;
|
|
|
|
|
2017-10-03 08:06:41 +08:00
|
|
|
def : GCNPat<
|
Revert r372285 "GlobalISel: Don't materialize immarg arguments to intrinsics"
This broke the Chromium build, causing it to fail with e.g.
fatal error: error in backend: Cannot select: t362: v4i32 = X86ISD::VSHLI t392, Constant:i8<15>
See llvm-commits thread of r372285 for details.
This also reverts r372286, r372287, r372288, r372289, r372290, r372291,
r372292, r372293, r372296, and r372297, which seemed to depend on the
main commit.
> Encode them directly as an imm argument to G_INTRINSIC*.
>
> Since now intrinsics can now define what parameters are required to be
> immediates, avoid using registers for them. Intrinsics could
> potentially want a constant that isn't a legal register type. Also,
> since G_CONSTANT is subject to CSE and legalization, transforms could
> potentially obscure the value (and create extra work for the
> selector). The register bank of a G_CONSTANT is also meaningful, so
> this could throw off future folding and legalization logic for AMDGPU.
>
> This will be much more convenient to work with than needing to call
> getConstantVRegVal and checking if it may have failed for every
> constant intrinsic parameter. AMDGPU has quite a lot of intrinsics wth
> immarg operands, many of which need inspection during lowering. Having
> to find the value in a register is going to add a lot of boilerplate
> and waste compile time.
>
> SelectionDAG has always provided TargetConstant for constants which
> should not be legalized or materialized in a register. The distinction
> between Constant and TargetConstant was somewhat fuzzy, and there was
> no automatic way to force usage of TargetConstant for certain
> intrinsic parameters. They were both ultimately ConstantSDNode, and it
> was inconsistently used. It was quite easy to mis-select an
> instruction requiring an immediate. For SelectionDAG, start emitting
> TargetConstant for these arguments, and using timm to match them.
>
> Most of the work here is to cleanup target handling of constants. Some
> targets process intrinsics through intermediate custom nodes, which
> need to preserve TargetConstant usage to match the intrinsic
> expectation. Pattern inputs now need to distinguish whether a constant
> is merely compatible with an operand or whether it is mandatory.
>
> The GlobalISelEmitter needs to treat timm as a special case of a leaf
> node, simlar to MachineBasicBlock operands. This should also enable
> handling of patterns for some G_* instructions with immediates, like
> G_FENCE or G_EXTRACT.
>
> This does include a workaround for a crash in GlobalISelEmitter when
> ARM tries to uses "imm" in an output with a "timm" pattern source.
llvm-svn: 372314
2019-09-19 20:33:07 +08:00
|
|
|
(name vt:$vdata, v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, imm:$offset,
|
|
|
|
imm:$cachepolicy, imm),
|
2016-09-10 21:09:16 +08:00
|
|
|
(!cast<MUBUF_Pseudo>(opcode # _BOTHEN_exact)
|
|
|
|
$vdata,
|
|
|
|
(REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
|
2019-05-01 06:08:23 +08:00
|
|
|
$rsrc, $soffset, (as_i16imm $offset), (extract_glc $cachepolicy),
|
|
|
|
(extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
|
2016-09-10 21:09:16 +08:00
|
|
|
>;
|
|
|
|
}
|
|
|
|
|
2017-11-09 09:52:48 +08:00
|
|
|
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format, f32, "BUFFER_STORE_FORMAT_X">;
|
2018-08-21 19:08:12 +08:00
|
|
|
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format, i32, "BUFFER_STORE_FORMAT_X">;
|
2017-11-09 09:52:48 +08:00
|
|
|
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format, v2f32, "BUFFER_STORE_FORMAT_XY">;
|
2018-08-21 19:08:12 +08:00
|
|
|
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format, v2i32, "BUFFER_STORE_FORMAT_XY">;
|
2019-03-22 22:58:02 +08:00
|
|
|
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format, v3f32, "BUFFER_STORE_FORMAT_XYZ">;
|
|
|
|
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format, v3i32, "BUFFER_STORE_FORMAT_XYZ">;
|
2017-11-09 09:52:48 +08:00
|
|
|
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format, v4f32, "BUFFER_STORE_FORMAT_XYZW">;
|
2018-08-21 19:08:12 +08:00
|
|
|
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format, v4i32, "BUFFER_STORE_FORMAT_XYZW">;
|
2018-01-13 05:12:19 +08:00
|
|
|
|
|
|
|
let SubtargetPredicate = HasUnpackedD16VMem in {
|
|
|
|
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, f16, "BUFFER_STORE_FORMAT_D16_X_gfx80">;
|
2018-08-21 19:08:12 +08:00
|
|
|
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, i16, "BUFFER_STORE_FORMAT_D16_X_gfx80">;
|
2018-01-13 05:12:19 +08:00
|
|
|
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, v2i32, "BUFFER_STORE_FORMAT_D16_XY_gfx80">;
|
|
|
|
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, v4i32, "BUFFER_STORE_FORMAT_D16_XYZW_gfx80">;
|
|
|
|
} // End HasUnpackedD16VMem.
|
|
|
|
|
|
|
|
let SubtargetPredicate = HasPackedD16VMem in {
|
|
|
|
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, f16, "BUFFER_STORE_FORMAT_D16_X">;
|
2018-08-21 19:08:12 +08:00
|
|
|
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, i16, "BUFFER_STORE_FORMAT_D16_X">;
|
2018-01-13 05:12:19 +08:00
|
|
|
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, v2f16, "BUFFER_STORE_FORMAT_D16_XY">;
|
2018-08-21 19:08:12 +08:00
|
|
|
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, v2i16, "BUFFER_STORE_FORMAT_D16_XY">;
|
2018-06-15 23:15:46 +08:00
|
|
|
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, v4f16, "BUFFER_STORE_FORMAT_D16_XYZW">;
|
2018-08-21 19:08:12 +08:00
|
|
|
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, v4i16, "BUFFER_STORE_FORMAT_D16_XYZW">;
|
2018-01-13 05:12:19 +08:00
|
|
|
} // End HasPackedD16VMem.
|
|
|
|
|
2017-11-09 09:52:48 +08:00
|
|
|
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store, f32, "BUFFER_STORE_DWORD">;
|
2018-08-21 19:08:12 +08:00
|
|
|
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store, i32, "BUFFER_STORE_DWORD">;
|
2019-08-05 22:57:59 +08:00
|
|
|
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store, v2i16, "BUFFER_STORE_DWORD">;
|
|
|
|
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store, v2f16, "BUFFER_STORE_DWORD">;
|
2017-11-09 09:52:48 +08:00
|
|
|
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store, v2f32, "BUFFER_STORE_DWORDX2">;
|
2018-08-21 19:08:12 +08:00
|
|
|
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store, v2i32, "BUFFER_STORE_DWORDX2">;
|
2019-08-05 22:57:59 +08:00
|
|
|
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store, v4i16, "BUFFER_STORE_DWORDX2">;
|
|
|
|
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store, v4f16, "BUFFER_STORE_DWORDX2">;
|
2019-03-22 22:58:02 +08:00
|
|
|
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store, v3f32, "BUFFER_STORE_DWORDX3">;
|
|
|
|
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store, v3i32, "BUFFER_STORE_DWORDX3">;
|
2017-11-09 09:52:48 +08:00
|
|
|
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store, v4f32, "BUFFER_STORE_DWORDX4">;
|
2018-08-21 19:08:12 +08:00
|
|
|
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store, v4i32, "BUFFER_STORE_DWORDX4">;
|
[AMDGPU] Add buffer/load 8/16 bit overloaded intrinsics
Summary:
Add buffer store/load 8/16 overloaded intrinsics for buffer, raw_buffer and struct_buffer
Change-Id: I166a29f071b2ff4e4683fb0392564b1f223ac61d
Subscribers: arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D59265
llvm-svn: 356465
2019-03-20 00:07:00 +08:00
|
|
|
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_byte, i32, "BUFFER_STORE_BYTE">;
|
|
|
|
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_short, i32, "BUFFER_STORE_SHORT">;
|
2016-09-10 21:09:16 +08:00
|
|
|
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// buffer_atomic patterns
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
[AMDGPU] Add support for 64 bit buffer atomic artihmetic instructions
Summary:
This adds support for 64 bit buffer atomic arithmetic instructions but does not include
cmpswap as that depends on a fix to the way the register pairs are handled
Change-Id: Ib207ea65fb69487ccad5066ea647ae8ddfe2ce61
Subscribers: arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, jfb, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D58918
llvm-svn: 355520
2019-03-07 01:02:06 +08:00
|
|
|
multiclass BufferAtomicPatterns<SDPatternOperator name, ValueType vt,
|
|
|
|
string opcode> {
|
2017-10-03 08:06:41 +08:00
|
|
|
def : GCNPat<
|
[AMDGPU] Add support for 64 bit buffer atomic artihmetic instructions
Summary:
This adds support for 64 bit buffer atomic arithmetic instructions but does not include
cmpswap as that depends on a fix to the way the register pairs are handled
Change-Id: Ib207ea65fb69487ccad5066ea647ae8ddfe2ce61
Subscribers: arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, jfb, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D58918
llvm-svn: 355520
2019-03-07 01:02:06 +08:00
|
|
|
(vt (name vt:$vdata_in, v4i32:$rsrc, 0,
|
Revert r372285 "GlobalISel: Don't materialize immarg arguments to intrinsics"
This broke the Chromium build, causing it to fail with e.g.
fatal error: error in backend: Cannot select: t362: v4i32 = X86ISD::VSHLI t392, Constant:i8<15>
See llvm-commits thread of r372285 for details.
This also reverts r372286, r372287, r372288, r372289, r372290, r372291,
r372292, r372293, r372296, and r372297, which seemed to depend on the
main commit.
> Encode them directly as an imm argument to G_INTRINSIC*.
>
> Since now intrinsics can now define what parameters are required to be
> immediates, avoid using registers for them. Intrinsics could
> potentially want a constant that isn't a legal register type. Also,
> since G_CONSTANT is subject to CSE and legalization, transforms could
> potentially obscure the value (and create extra work for the
> selector). The register bank of a G_CONSTANT is also meaningful, so
> this could throw off future folding and legalization logic for AMDGPU.
>
> This will be much more convenient to work with than needing to call
> getConstantVRegVal and checking if it may have failed for every
> constant intrinsic parameter. AMDGPU has quite a lot of intrinsics wth
> immarg operands, many of which need inspection during lowering. Having
> to find the value in a register is going to add a lot of boilerplate
> and waste compile time.
>
> SelectionDAG has always provided TargetConstant for constants which
> should not be legalized or materialized in a register. The distinction
> between Constant and TargetConstant was somewhat fuzzy, and there was
> no automatic way to force usage of TargetConstant for certain
> intrinsic parameters. They were both ultimately ConstantSDNode, and it
> was inconsistently used. It was quite easy to mis-select an
> instruction requiring an immediate. For SelectionDAG, start emitting
> TargetConstant for these arguments, and using timm to match them.
>
> Most of the work here is to cleanup target handling of constants. Some
> targets process intrinsics through intermediate custom nodes, which
> need to preserve TargetConstant usage to match the intrinsic
> expectation. Pattern inputs now need to distinguish whether a constant
> is merely compatible with an operand or whether it is mandatory.
>
> The GlobalISelEmitter needs to treat timm as a special case of a leaf
> node, simlar to MachineBasicBlock operands. This should also enable
> handling of patterns for some G_* instructions with immediates, like
> G_FENCE or G_EXTRACT.
>
> This does include a workaround for a crash in GlobalISelEmitter when
> ARM tries to uses "imm" in an output with a "timm" pattern source.
llvm-svn: 372314
2019-09-19 20:33:07 +08:00
|
|
|
0, i32:$soffset, imm:$offset,
|
|
|
|
imm:$cachepolicy, 0)),
|
2017-07-21 05:06:04 +08:00
|
|
|
(!cast<MUBUF_Pseudo>(opcode # _OFFSET_RTN) $vdata_in, $rsrc, $soffset,
|
[AMDGPU] New buffer intrinsics
Summary:
This commit adds new intrinsics
llvm.amdgcn.raw.buffer.load
llvm.amdgcn.raw.buffer.load.format
llvm.amdgcn.raw.buffer.load.format.d16
llvm.amdgcn.struct.buffer.load
llvm.amdgcn.struct.buffer.load.format
llvm.amdgcn.struct.buffer.load.format.d16
llvm.amdgcn.raw.buffer.store
llvm.amdgcn.raw.buffer.store.format
llvm.amdgcn.raw.buffer.store.format.d16
llvm.amdgcn.struct.buffer.store
llvm.amdgcn.struct.buffer.store.format
llvm.amdgcn.struct.buffer.store.format.d16
llvm.amdgcn.raw.buffer.atomic.*
llvm.amdgcn.struct.buffer.atomic.*
with the following changes from the llvm.amdgcn.buffer.*
intrinsics:
* there are separate raw and struct versions: raw does not have an
index arg and sets idxen=0 in the instruction, and struct always sets
idxen=1 in the instruction even if the index is 0, to allow for the
fact that gfx9 does bounds checking differently depending on whether
idxen is set;
* there is a combined cachepolicy arg (glc+slc)
* there are now only two offset args: one for the offset that is
included in bounds checking and swizzling, to be split between the
instruction's voffset and immoffset fields, and one for the offset
that is excluded from bounds checking and swizzling, to go into the
instruction's soffset field.
The AMDISD::BUFFER_* SD nodes always have an index operand, all three
offset operands, combined cachepolicy operand, and an extra idxen
operand.
The obsolescent llvm.amdgcn.buffer.* intrinsics continue to work.
Subscribers: arsenm, kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, t-tye, jfb, llvm-commits
Differential Revision: https://reviews.llvm.org/D50306
Change-Id: If897ea7dc34fcbf4d5496e98cc99a934f62fc205
llvm-svn: 340269
2018-08-21 19:07:10 +08:00
|
|
|
(as_i16imm $offset), (extract_slc $cachepolicy))
|
2016-09-10 21:09:16 +08:00
|
|
|
>;
|
|
|
|
|
2017-10-03 08:06:41 +08:00
|
|
|
def : GCNPat<
|
[AMDGPU] Add support for 64 bit buffer atomic artihmetic instructions
Summary:
This adds support for 64 bit buffer atomic arithmetic instructions but does not include
cmpswap as that depends on a fix to the way the register pairs are handled
Change-Id: Ib207ea65fb69487ccad5066ea647ae8ddfe2ce61
Subscribers: arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, jfb, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D58918
llvm-svn: 355520
2019-03-07 01:02:06 +08:00
|
|
|
(vt (name vt:$vdata_in, v4i32:$rsrc, i32:$vindex,
|
Revert r372285 "GlobalISel: Don't materialize immarg arguments to intrinsics"
This broke the Chromium build, causing it to fail with e.g.
fatal error: error in backend: Cannot select: t362: v4i32 = X86ISD::VSHLI t392, Constant:i8<15>
See llvm-commits thread of r372285 for details.
This also reverts r372286, r372287, r372288, r372289, r372290, r372291,
r372292, r372293, r372296, and r372297, which seemed to depend on the
main commit.
> Encode them directly as an imm argument to G_INTRINSIC*.
>
> Since now intrinsics can now define what parameters are required to be
> immediates, avoid using registers for them. Intrinsics could
> potentially want a constant that isn't a legal register type. Also,
> since G_CONSTANT is subject to CSE and legalization, transforms could
> potentially obscure the value (and create extra work for the
> selector). The register bank of a G_CONSTANT is also meaningful, so
> this could throw off future folding and legalization logic for AMDGPU.
>
> This will be much more convenient to work with than needing to call
> getConstantVRegVal and checking if it may have failed for every
> constant intrinsic parameter. AMDGPU has quite a lot of intrinsics wth
> immarg operands, many of which need inspection during lowering. Having
> to find the value in a register is going to add a lot of boilerplate
> and waste compile time.
>
> SelectionDAG has always provided TargetConstant for constants which
> should not be legalized or materialized in a register. The distinction
> between Constant and TargetConstant was somewhat fuzzy, and there was
> no automatic way to force usage of TargetConstant for certain
> intrinsic parameters. They were both ultimately ConstantSDNode, and it
> was inconsistently used. It was quite easy to mis-select an
> instruction requiring an immediate. For SelectionDAG, start emitting
> TargetConstant for these arguments, and using timm to match them.
>
> Most of the work here is to cleanup target handling of constants. Some
> targets process intrinsics through intermediate custom nodes, which
> need to preserve TargetConstant usage to match the intrinsic
> expectation. Pattern inputs now need to distinguish whether a constant
> is merely compatible with an operand or whether it is mandatory.
>
> The GlobalISelEmitter needs to treat timm as a special case of a leaf
> node, simlar to MachineBasicBlock operands. This should also enable
> handling of patterns for some G_* instructions with immediates, like
> G_FENCE or G_EXTRACT.
>
> This does include a workaround for a crash in GlobalISelEmitter when
> ARM tries to uses "imm" in an output with a "timm" pattern source.
llvm-svn: 372314
2019-09-19 20:33:07 +08:00
|
|
|
0, i32:$soffset, imm:$offset,
|
|
|
|
imm:$cachepolicy, imm)),
|
2017-07-21 05:06:04 +08:00
|
|
|
(!cast<MUBUF_Pseudo>(opcode # _IDXEN_RTN) $vdata_in, $vindex, $rsrc, $soffset,
|
[AMDGPU] New buffer intrinsics
Summary:
This commit adds new intrinsics
llvm.amdgcn.raw.buffer.load
llvm.amdgcn.raw.buffer.load.format
llvm.amdgcn.raw.buffer.load.format.d16
llvm.amdgcn.struct.buffer.load
llvm.amdgcn.struct.buffer.load.format
llvm.amdgcn.struct.buffer.load.format.d16
llvm.amdgcn.raw.buffer.store
llvm.amdgcn.raw.buffer.store.format
llvm.amdgcn.raw.buffer.store.format.d16
llvm.amdgcn.struct.buffer.store
llvm.amdgcn.struct.buffer.store.format
llvm.amdgcn.struct.buffer.store.format.d16
llvm.amdgcn.raw.buffer.atomic.*
llvm.amdgcn.struct.buffer.atomic.*
with the following changes from the llvm.amdgcn.buffer.*
intrinsics:
* there are separate raw and struct versions: raw does not have an
index arg and sets idxen=0 in the instruction, and struct always sets
idxen=1 in the instruction even if the index is 0, to allow for the
fact that gfx9 does bounds checking differently depending on whether
idxen is set;
* there is a combined cachepolicy arg (glc+slc)
* there are now only two offset args: one for the offset that is
included in bounds checking and swizzling, to be split between the
instruction's voffset and immoffset fields, and one for the offset
that is excluded from bounds checking and swizzling, to go into the
instruction's soffset field.
The AMDISD::BUFFER_* SD nodes always have an index operand, all three
offset operands, combined cachepolicy operand, and an extra idxen
operand.
The obsolescent llvm.amdgcn.buffer.* intrinsics continue to work.
Subscribers: arsenm, kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, t-tye, jfb, llvm-commits
Differential Revision: https://reviews.llvm.org/D50306
Change-Id: If897ea7dc34fcbf4d5496e98cc99a934f62fc205
llvm-svn: 340269
2018-08-21 19:07:10 +08:00
|
|
|
(as_i16imm $offset), (extract_slc $cachepolicy))
|
2016-09-10 21:09:16 +08:00
|
|
|
>;
|
|
|
|
|
2017-10-03 08:06:41 +08:00
|
|
|
def : GCNPat<
|
[AMDGPU] Add support for 64 bit buffer atomic artihmetic instructions
Summary:
This adds support for 64 bit buffer atomic arithmetic instructions but does not include
cmpswap as that depends on a fix to the way the register pairs are handled
Change-Id: Ib207ea65fb69487ccad5066ea647ae8ddfe2ce61
Subscribers: arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, jfb, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D58918
llvm-svn: 355520
2019-03-07 01:02:06 +08:00
|
|
|
(vt (name vt:$vdata_in, v4i32:$rsrc, 0,
|
Revert r372285 "GlobalISel: Don't materialize immarg arguments to intrinsics"
This broke the Chromium build, causing it to fail with e.g.
fatal error: error in backend: Cannot select: t362: v4i32 = X86ISD::VSHLI t392, Constant:i8<15>
See llvm-commits thread of r372285 for details.
This also reverts r372286, r372287, r372288, r372289, r372290, r372291,
r372292, r372293, r372296, and r372297, which seemed to depend on the
main commit.
> Encode them directly as an imm argument to G_INTRINSIC*.
>
> Since now intrinsics can now define what parameters are required to be
> immediates, avoid using registers for them. Intrinsics could
> potentially want a constant that isn't a legal register type. Also,
> since G_CONSTANT is subject to CSE and legalization, transforms could
> potentially obscure the value (and create extra work for the
> selector). The register bank of a G_CONSTANT is also meaningful, so
> this could throw off future folding and legalization logic for AMDGPU.
>
> This will be much more convenient to work with than needing to call
> getConstantVRegVal and checking if it may have failed for every
> constant intrinsic parameter. AMDGPU has quite a lot of intrinsics wth
> immarg operands, many of which need inspection during lowering. Having
> to find the value in a register is going to add a lot of boilerplate
> and waste compile time.
>
> SelectionDAG has always provided TargetConstant for constants which
> should not be legalized or materialized in a register. The distinction
> between Constant and TargetConstant was somewhat fuzzy, and there was
> no automatic way to force usage of TargetConstant for certain
> intrinsic parameters. They were both ultimately ConstantSDNode, and it
> was inconsistently used. It was quite easy to mis-select an
> instruction requiring an immediate. For SelectionDAG, start emitting
> TargetConstant for these arguments, and using timm to match them.
>
> Most of the work here is to cleanup target handling of constants. Some
> targets process intrinsics through intermediate custom nodes, which
> need to preserve TargetConstant usage to match the intrinsic
> expectation. Pattern inputs now need to distinguish whether a constant
> is merely compatible with an operand or whether it is mandatory.
>
> The GlobalISelEmitter needs to treat timm as a special case of a leaf
> node, simlar to MachineBasicBlock operands. This should also enable
> handling of patterns for some G_* instructions with immediates, like
> G_FENCE or G_EXTRACT.
>
> This does include a workaround for a crash in GlobalISelEmitter when
> ARM tries to uses "imm" in an output with a "timm" pattern source.
llvm-svn: 372314
2019-09-19 20:33:07 +08:00
|
|
|
i32:$voffset, i32:$soffset, imm:$offset,
|
|
|
|
imm:$cachepolicy, 0)),
|
2017-07-21 05:06:04 +08:00
|
|
|
(!cast<MUBUF_Pseudo>(opcode # _OFFEN_RTN) $vdata_in, $voffset, $rsrc, $soffset,
|
[AMDGPU] New buffer intrinsics
Summary:
This commit adds new intrinsics
llvm.amdgcn.raw.buffer.load
llvm.amdgcn.raw.buffer.load.format
llvm.amdgcn.raw.buffer.load.format.d16
llvm.amdgcn.struct.buffer.load
llvm.amdgcn.struct.buffer.load.format
llvm.amdgcn.struct.buffer.load.format.d16
llvm.amdgcn.raw.buffer.store
llvm.amdgcn.raw.buffer.store.format
llvm.amdgcn.raw.buffer.store.format.d16
llvm.amdgcn.struct.buffer.store
llvm.amdgcn.struct.buffer.store.format
llvm.amdgcn.struct.buffer.store.format.d16
llvm.amdgcn.raw.buffer.atomic.*
llvm.amdgcn.struct.buffer.atomic.*
with the following changes from the llvm.amdgcn.buffer.*
intrinsics:
* there are separate raw and struct versions: raw does not have an
index arg and sets idxen=0 in the instruction, and struct always sets
idxen=1 in the instruction even if the index is 0, to allow for the
fact that gfx9 does bounds checking differently depending on whether
idxen is set;
* there is a combined cachepolicy arg (glc+slc)
* there are now only two offset args: one for the offset that is
included in bounds checking and swizzling, to be split between the
instruction's voffset and immoffset fields, and one for the offset
that is excluded from bounds checking and swizzling, to go into the
instruction's soffset field.
The AMDISD::BUFFER_* SD nodes always have an index operand, all three
offset operands, combined cachepolicy operand, and an extra idxen
operand.
The obsolescent llvm.amdgcn.buffer.* intrinsics continue to work.
Subscribers: arsenm, kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, t-tye, jfb, llvm-commits
Differential Revision: https://reviews.llvm.org/D50306
Change-Id: If897ea7dc34fcbf4d5496e98cc99a934f62fc205
llvm-svn: 340269
2018-08-21 19:07:10 +08:00
|
|
|
(as_i16imm $offset), (extract_slc $cachepolicy))
|
2016-09-10 21:09:16 +08:00
|
|
|
>;
|
|
|
|
|
2017-10-03 08:06:41 +08:00
|
|
|
def : GCNPat<
|
[AMDGPU] Add support for 64 bit buffer atomic artihmetic instructions
Summary:
This adds support for 64 bit buffer atomic arithmetic instructions but does not include
cmpswap as that depends on a fix to the way the register pairs are handled
Change-Id: Ib207ea65fb69487ccad5066ea647ae8ddfe2ce61
Subscribers: arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, jfb, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D58918
llvm-svn: 355520
2019-03-07 01:02:06 +08:00
|
|
|
(vt (name vt:$vdata_in, v4i32:$rsrc, i32:$vindex,
|
Revert r372285 "GlobalISel: Don't materialize immarg arguments to intrinsics"
This broke the Chromium build, causing it to fail with e.g.
fatal error: error in backend: Cannot select: t362: v4i32 = X86ISD::VSHLI t392, Constant:i8<15>
See llvm-commits thread of r372285 for details.
This also reverts r372286, r372287, r372288, r372289, r372290, r372291,
r372292, r372293, r372296, and r372297, which seemed to depend on the
main commit.
> Encode them directly as an imm argument to G_INTRINSIC*.
>
> Since now intrinsics can now define what parameters are required to be
> immediates, avoid using registers for them. Intrinsics could
> potentially want a constant that isn't a legal register type. Also,
> since G_CONSTANT is subject to CSE and legalization, transforms could
> potentially obscure the value (and create extra work for the
> selector). The register bank of a G_CONSTANT is also meaningful, so
> this could throw off future folding and legalization logic for AMDGPU.
>
> This will be much more convenient to work with than needing to call
> getConstantVRegVal and checking if it may have failed for every
> constant intrinsic parameter. AMDGPU has quite a lot of intrinsics wth
> immarg operands, many of which need inspection during lowering. Having
> to find the value in a register is going to add a lot of boilerplate
> and waste compile time.
>
> SelectionDAG has always provided TargetConstant for constants which
> should not be legalized or materialized in a register. The distinction
> between Constant and TargetConstant was somewhat fuzzy, and there was
> no automatic way to force usage of TargetConstant for certain
> intrinsic parameters. They were both ultimately ConstantSDNode, and it
> was inconsistently used. It was quite easy to mis-select an
> instruction requiring an immediate. For SelectionDAG, start emitting
> TargetConstant for these arguments, and using timm to match them.
>
> Most of the work here is to cleanup target handling of constants. Some
> targets process intrinsics through intermediate custom nodes, which
> need to preserve TargetConstant usage to match the intrinsic
> expectation. Pattern inputs now need to distinguish whether a constant
> is merely compatible with an operand or whether it is mandatory.
>
> The GlobalISelEmitter needs to treat timm as a special case of a leaf
> node, simlar to MachineBasicBlock operands. This should also enable
> handling of patterns for some G_* instructions with immediates, like
> G_FENCE or G_EXTRACT.
>
> This does include a workaround for a crash in GlobalISelEmitter when
> ARM tries to uses "imm" in an output with a "timm" pattern source.
llvm-svn: 372314
2019-09-19 20:33:07 +08:00
|
|
|
i32:$voffset, i32:$soffset, imm:$offset,
|
|
|
|
imm:$cachepolicy, imm)),
|
2017-07-21 05:06:04 +08:00
|
|
|
(!cast<MUBUF_Pseudo>(opcode # _BOTHEN_RTN)
|
2016-09-10 21:09:16 +08:00
|
|
|
$vdata_in,
|
|
|
|
(REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
|
[AMDGPU] New buffer intrinsics
Summary:
This commit adds new intrinsics
llvm.amdgcn.raw.buffer.load
llvm.amdgcn.raw.buffer.load.format
llvm.amdgcn.raw.buffer.load.format.d16
llvm.amdgcn.struct.buffer.load
llvm.amdgcn.struct.buffer.load.format
llvm.amdgcn.struct.buffer.load.format.d16
llvm.amdgcn.raw.buffer.store
llvm.amdgcn.raw.buffer.store.format
llvm.amdgcn.raw.buffer.store.format.d16
llvm.amdgcn.struct.buffer.store
llvm.amdgcn.struct.buffer.store.format
llvm.amdgcn.struct.buffer.store.format.d16
llvm.amdgcn.raw.buffer.atomic.*
llvm.amdgcn.struct.buffer.atomic.*
with the following changes from the llvm.amdgcn.buffer.*
intrinsics:
* there are separate raw and struct versions: raw does not have an
index arg and sets idxen=0 in the instruction, and struct always sets
idxen=1 in the instruction even if the index is 0, to allow for the
fact that gfx9 does bounds checking differently depending on whether
idxen is set;
* there is a combined cachepolicy arg (glc+slc)
* there are now only two offset args: one for the offset that is
included in bounds checking and swizzling, to be split between the
instruction's voffset and immoffset fields, and one for the offset
that is excluded from bounds checking and swizzling, to go into the
instruction's soffset field.
The AMDISD::BUFFER_* SD nodes always have an index operand, all three
offset operands, combined cachepolicy operand, and an extra idxen
operand.
The obsolescent llvm.amdgcn.buffer.* intrinsics continue to work.
Subscribers: arsenm, kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, t-tye, jfb, llvm-commits
Differential Revision: https://reviews.llvm.org/D50306
Change-Id: If897ea7dc34fcbf4d5496e98cc99a934f62fc205
llvm-svn: 340269
2018-08-21 19:07:10 +08:00
|
|
|
$rsrc, $soffset, (as_i16imm $offset), (extract_slc $cachepolicy))
|
2016-09-10 21:09:16 +08:00
|
|
|
>;
|
|
|
|
}
|
|
|
|
|
[AMDGPU] Add support for 64 bit buffer atomic artihmetic instructions
Summary:
This adds support for 64 bit buffer atomic arithmetic instructions but does not include
cmpswap as that depends on a fix to the way the register pairs are handled
Change-Id: Ib207ea65fb69487ccad5066ea647ae8ddfe2ce61
Subscribers: arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, jfb, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D58918
llvm-svn: 355520
2019-03-07 01:02:06 +08:00
|
|
|
defm : BufferAtomicPatterns<SIbuffer_atomic_swap, i32, "BUFFER_ATOMIC_SWAP">;
|
|
|
|
defm : BufferAtomicPatterns<SIbuffer_atomic_add, i32, "BUFFER_ATOMIC_ADD">;
|
|
|
|
defm : BufferAtomicPatterns<SIbuffer_atomic_sub, i32, "BUFFER_ATOMIC_SUB">;
|
|
|
|
defm : BufferAtomicPatterns<SIbuffer_atomic_smin, i32, "BUFFER_ATOMIC_SMIN">;
|
|
|
|
defm : BufferAtomicPatterns<SIbuffer_atomic_umin, i32, "BUFFER_ATOMIC_UMIN">;
|
|
|
|
defm : BufferAtomicPatterns<SIbuffer_atomic_smax, i32, "BUFFER_ATOMIC_SMAX">;
|
|
|
|
defm : BufferAtomicPatterns<SIbuffer_atomic_umax, i32, "BUFFER_ATOMIC_UMAX">;
|
|
|
|
defm : BufferAtomicPatterns<SIbuffer_atomic_and, i32, "BUFFER_ATOMIC_AND">;
|
|
|
|
defm : BufferAtomicPatterns<SIbuffer_atomic_or, i32, "BUFFER_ATOMIC_OR">;
|
|
|
|
defm : BufferAtomicPatterns<SIbuffer_atomic_xor, i32, "BUFFER_ATOMIC_XOR">;
|
AMDGPU: add missing llvm.amdgcn.{raw,struct}.buffer.atomic.{inc,dec}
Summary:
Wrapping increment/decrement. These aren't exposed by many APIs...
Change-Id: I1df25c7889de5a5ba76468ad8e8a2597efa9af6c
Reviewers: arsenm, tpr, dstuttard
Subscribers: kzhuravl, jvesely, wdng, yaxunl, t-tye, jfb, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D65283
llvm-svn: 367821
2019-08-05 17:36:06 +08:00
|
|
|
defm : BufferAtomicPatterns<SIbuffer_atomic_inc, i32, "BUFFER_ATOMIC_INC">;
|
|
|
|
defm : BufferAtomicPatterns<SIbuffer_atomic_dec, i32, "BUFFER_ATOMIC_DEC">;
|
[AMDGPU] Add support for 64 bit buffer atomic artihmetic instructions
Summary:
This adds support for 64 bit buffer atomic arithmetic instructions but does not include
cmpswap as that depends on a fix to the way the register pairs are handled
Change-Id: Ib207ea65fb69487ccad5066ea647ae8ddfe2ce61
Subscribers: arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, jfb, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D58918
llvm-svn: 355520
2019-03-07 01:02:06 +08:00
|
|
|
defm : BufferAtomicPatterns<SIbuffer_atomic_swap, i64, "BUFFER_ATOMIC_SWAP_X2">;
|
|
|
|
defm : BufferAtomicPatterns<SIbuffer_atomic_add, i64, "BUFFER_ATOMIC_ADD_X2">;
|
|
|
|
defm : BufferAtomicPatterns<SIbuffer_atomic_sub, i64, "BUFFER_ATOMIC_SUB_X2">;
|
|
|
|
defm : BufferAtomicPatterns<SIbuffer_atomic_smin, i64, "BUFFER_ATOMIC_SMIN_X2">;
|
|
|
|
defm : BufferAtomicPatterns<SIbuffer_atomic_umin, i64, "BUFFER_ATOMIC_UMIN_X2">;
|
|
|
|
defm : BufferAtomicPatterns<SIbuffer_atomic_smax, i64, "BUFFER_ATOMIC_SMAX_X2">;
|
|
|
|
defm : BufferAtomicPatterns<SIbuffer_atomic_umax, i64, "BUFFER_ATOMIC_UMAX_X2">;
|
|
|
|
defm : BufferAtomicPatterns<SIbuffer_atomic_and, i64, "BUFFER_ATOMIC_AND_X2">;
|
|
|
|
defm : BufferAtomicPatterns<SIbuffer_atomic_or, i64, "BUFFER_ATOMIC_OR_X2">;
|
|
|
|
defm : BufferAtomicPatterns<SIbuffer_atomic_xor, i64, "BUFFER_ATOMIC_XOR_X2">;
|
AMDGPU: add missing llvm.amdgcn.{raw,struct}.buffer.atomic.{inc,dec}
Summary:
Wrapping increment/decrement. These aren't exposed by many APIs...
Change-Id: I1df25c7889de5a5ba76468ad8e8a2597efa9af6c
Reviewers: arsenm, tpr, dstuttard
Subscribers: kzhuravl, jvesely, wdng, yaxunl, t-tye, jfb, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D65283
llvm-svn: 367821
2019-08-05 17:36:06 +08:00
|
|
|
defm : BufferAtomicPatterns<SIbuffer_atomic_inc, i64, "BUFFER_ATOMIC_INC_X2">;
|
|
|
|
defm : BufferAtomicPatterns<SIbuffer_atomic_dec, i64, "BUFFER_ATOMIC_DEC_X2">;
|
2016-09-10 21:09:16 +08:00
|
|
|
|
2019-07-11 08:10:17 +08:00
|
|
|
multiclass BufferAtomicPatterns_NO_RTN<SDPatternOperator name, ValueType vt,
|
|
|
|
string opcode> {
|
|
|
|
def : GCNPat<
|
|
|
|
(name vt:$vdata_in, v4i32:$rsrc, 0,
|
Revert r372285 "GlobalISel: Don't materialize immarg arguments to intrinsics"
This broke the Chromium build, causing it to fail with e.g.
fatal error: error in backend: Cannot select: t362: v4i32 = X86ISD::VSHLI t392, Constant:i8<15>
See llvm-commits thread of r372285 for details.
This also reverts r372286, r372287, r372288, r372289, r372290, r372291,
r372292, r372293, r372296, and r372297, which seemed to depend on the
main commit.
> Encode them directly as an imm argument to G_INTRINSIC*.
>
> Since now intrinsics can now define what parameters are required to be
> immediates, avoid using registers for them. Intrinsics could
> potentially want a constant that isn't a legal register type. Also,
> since G_CONSTANT is subject to CSE and legalization, transforms could
> potentially obscure the value (and create extra work for the
> selector). The register bank of a G_CONSTANT is also meaningful, so
> this could throw off future folding and legalization logic for AMDGPU.
>
> This will be much more convenient to work with than needing to call
> getConstantVRegVal and checking if it may have failed for every
> constant intrinsic parameter. AMDGPU has quite a lot of intrinsics wth
> immarg operands, many of which need inspection during lowering. Having
> to find the value in a register is going to add a lot of boilerplate
> and waste compile time.
>
> SelectionDAG has always provided TargetConstant for constants which
> should not be legalized or materialized in a register. The distinction
> between Constant and TargetConstant was somewhat fuzzy, and there was
> no automatic way to force usage of TargetConstant for certain
> intrinsic parameters. They were both ultimately ConstantSDNode, and it
> was inconsistently used. It was quite easy to mis-select an
> instruction requiring an immediate. For SelectionDAG, start emitting
> TargetConstant for these arguments, and using timm to match them.
>
> Most of the work here is to cleanup target handling of constants. Some
> targets process intrinsics through intermediate custom nodes, which
> need to preserve TargetConstant usage to match the intrinsic
> expectation. Pattern inputs now need to distinguish whether a constant
> is merely compatible with an operand or whether it is mandatory.
>
> The GlobalISelEmitter needs to treat timm as a special case of a leaf
> node, simlar to MachineBasicBlock operands. This should also enable
> handling of patterns for some G_* instructions with immediates, like
> G_FENCE or G_EXTRACT.
>
> This does include a workaround for a crash in GlobalISelEmitter when
> ARM tries to uses "imm" in an output with a "timm" pattern source.
llvm-svn: 372314
2019-09-19 20:33:07 +08:00
|
|
|
0, i32:$soffset, imm:$offset,
|
|
|
|
imm:$cachepolicy, 0),
|
2019-07-11 08:10:17 +08:00
|
|
|
(!cast<MUBUF_Pseudo>(opcode # _OFFSET) $vdata_in, $rsrc, $soffset,
|
|
|
|
(as_i16imm $offset), (extract_slc $cachepolicy))
|
|
|
|
>;
|
|
|
|
|
|
|
|
def : GCNPat<
|
|
|
|
(name vt:$vdata_in, v4i32:$rsrc, i32:$vindex,
|
Revert r372285 "GlobalISel: Don't materialize immarg arguments to intrinsics"
This broke the Chromium build, causing it to fail with e.g.
fatal error: error in backend: Cannot select: t362: v4i32 = X86ISD::VSHLI t392, Constant:i8<15>
See llvm-commits thread of r372285 for details.
This also reverts r372286, r372287, r372288, r372289, r372290, r372291,
r372292, r372293, r372296, and r372297, which seemed to depend on the
main commit.
> Encode them directly as an imm argument to G_INTRINSIC*.
>
> Since now intrinsics can now define what parameters are required to be
> immediates, avoid using registers for them. Intrinsics could
> potentially want a constant that isn't a legal register type. Also,
> since G_CONSTANT is subject to CSE and legalization, transforms could
> potentially obscure the value (and create extra work for the
> selector). The register bank of a G_CONSTANT is also meaningful, so
> this could throw off future folding and legalization logic for AMDGPU.
>
> This will be much more convenient to work with than needing to call
> getConstantVRegVal and checking if it may have failed for every
> constant intrinsic parameter. AMDGPU has quite a lot of intrinsics wth
> immarg operands, many of which need inspection during lowering. Having
> to find the value in a register is going to add a lot of boilerplate
> and waste compile time.
>
> SelectionDAG has always provided TargetConstant for constants which
> should not be legalized or materialized in a register. The distinction
> between Constant and TargetConstant was somewhat fuzzy, and there was
> no automatic way to force usage of TargetConstant for certain
> intrinsic parameters. They were both ultimately ConstantSDNode, and it
> was inconsistently used. It was quite easy to mis-select an
> instruction requiring an immediate. For SelectionDAG, start emitting
> TargetConstant for these arguments, and using timm to match them.
>
> Most of the work here is to cleanup target handling of constants. Some
> targets process intrinsics through intermediate custom nodes, which
> need to preserve TargetConstant usage to match the intrinsic
> expectation. Pattern inputs now need to distinguish whether a constant
> is merely compatible with an operand or whether it is mandatory.
>
> The GlobalISelEmitter needs to treat timm as a special case of a leaf
> node, simlar to MachineBasicBlock operands. This should also enable
> handling of patterns for some G_* instructions with immediates, like
> G_FENCE or G_EXTRACT.
>
> This does include a workaround for a crash in GlobalISelEmitter when
> ARM tries to uses "imm" in an output with a "timm" pattern source.
llvm-svn: 372314
2019-09-19 20:33:07 +08:00
|
|
|
0, i32:$soffset, imm:$offset,
|
|
|
|
imm:$cachepolicy, imm),
|
2019-07-11 08:10:17 +08:00
|
|
|
(!cast<MUBUF_Pseudo>(opcode # _IDXEN) $vdata_in, $vindex, $rsrc, $soffset,
|
|
|
|
(as_i16imm $offset), (extract_slc $cachepolicy))
|
|
|
|
>;
|
|
|
|
|
|
|
|
def : GCNPat<
|
|
|
|
(name vt:$vdata_in, v4i32:$rsrc, 0,
|
Revert r372285 "GlobalISel: Don't materialize immarg arguments to intrinsics"
This broke the Chromium build, causing it to fail with e.g.
fatal error: error in backend: Cannot select: t362: v4i32 = X86ISD::VSHLI t392, Constant:i8<15>
See llvm-commits thread of r372285 for details.
This also reverts r372286, r372287, r372288, r372289, r372290, r372291,
r372292, r372293, r372296, and r372297, which seemed to depend on the
main commit.
> Encode them directly as an imm argument to G_INTRINSIC*.
>
> Since now intrinsics can now define what parameters are required to be
> immediates, avoid using registers for them. Intrinsics could
> potentially want a constant that isn't a legal register type. Also,
> since G_CONSTANT is subject to CSE and legalization, transforms could
> potentially obscure the value (and create extra work for the
> selector). The register bank of a G_CONSTANT is also meaningful, so
> this could throw off future folding and legalization logic for AMDGPU.
>
> This will be much more convenient to work with than needing to call
> getConstantVRegVal and checking if it may have failed for every
> constant intrinsic parameter. AMDGPU has quite a lot of intrinsics wth
> immarg operands, many of which need inspection during lowering. Having
> to find the value in a register is going to add a lot of boilerplate
> and waste compile time.
>
> SelectionDAG has always provided TargetConstant for constants which
> should not be legalized or materialized in a register. The distinction
> between Constant and TargetConstant was somewhat fuzzy, and there was
> no automatic way to force usage of TargetConstant for certain
> intrinsic parameters. They were both ultimately ConstantSDNode, and it
> was inconsistently used. It was quite easy to mis-select an
> instruction requiring an immediate. For SelectionDAG, start emitting
> TargetConstant for these arguments, and using timm to match them.
>
> Most of the work here is to cleanup target handling of constants. Some
> targets process intrinsics through intermediate custom nodes, which
> need to preserve TargetConstant usage to match the intrinsic
> expectation. Pattern inputs now need to distinguish whether a constant
> is merely compatible with an operand or whether it is mandatory.
>
> The GlobalISelEmitter needs to treat timm as a special case of a leaf
> node, simlar to MachineBasicBlock operands. This should also enable
> handling of patterns for some G_* instructions with immediates, like
> G_FENCE or G_EXTRACT.
>
> This does include a workaround for a crash in GlobalISelEmitter when
> ARM tries to uses "imm" in an output with a "timm" pattern source.
llvm-svn: 372314
2019-09-19 20:33:07 +08:00
|
|
|
i32:$voffset, i32:$soffset, imm:$offset,
|
|
|
|
imm:$cachepolicy, 0),
|
2019-07-11 08:10:17 +08:00
|
|
|
(!cast<MUBUF_Pseudo>(opcode # _OFFEN) $vdata_in, $voffset, $rsrc, $soffset,
|
|
|
|
(as_i16imm $offset), (extract_slc $cachepolicy))
|
|
|
|
>;
|
|
|
|
|
|
|
|
def : GCNPat<
|
|
|
|
(name vt:$vdata_in, v4i32:$rsrc, i32:$vindex,
|
Revert r372285 "GlobalISel: Don't materialize immarg arguments to intrinsics"
This broke the Chromium build, causing it to fail with e.g.
fatal error: error in backend: Cannot select: t362: v4i32 = X86ISD::VSHLI t392, Constant:i8<15>
See llvm-commits thread of r372285 for details.
This also reverts r372286, r372287, r372288, r372289, r372290, r372291,
r372292, r372293, r372296, and r372297, which seemed to depend on the
main commit.
> Encode them directly as an imm argument to G_INTRINSIC*.
>
> Since now intrinsics can now define what parameters are required to be
> immediates, avoid using registers for them. Intrinsics could
> potentially want a constant that isn't a legal register type. Also,
> since G_CONSTANT is subject to CSE and legalization, transforms could
> potentially obscure the value (and create extra work for the
> selector). The register bank of a G_CONSTANT is also meaningful, so
> this could throw off future folding and legalization logic for AMDGPU.
>
> This will be much more convenient to work with than needing to call
> getConstantVRegVal and checking if it may have failed for every
> constant intrinsic parameter. AMDGPU has quite a lot of intrinsics wth
> immarg operands, many of which need inspection during lowering. Having
> to find the value in a register is going to add a lot of boilerplate
> and waste compile time.
>
> SelectionDAG has always provided TargetConstant for constants which
> should not be legalized or materialized in a register. The distinction
> between Constant and TargetConstant was somewhat fuzzy, and there was
> no automatic way to force usage of TargetConstant for certain
> intrinsic parameters. They were both ultimately ConstantSDNode, and it
> was inconsistently used. It was quite easy to mis-select an
> instruction requiring an immediate. For SelectionDAG, start emitting
> TargetConstant for these arguments, and using timm to match them.
>
> Most of the work here is to cleanup target handling of constants. Some
> targets process intrinsics through intermediate custom nodes, which
> need to preserve TargetConstant usage to match the intrinsic
> expectation. Pattern inputs now need to distinguish whether a constant
> is merely compatible with an operand or whether it is mandatory.
>
> The GlobalISelEmitter needs to treat timm as a special case of a leaf
> node, simlar to MachineBasicBlock operands. This should also enable
> handling of patterns for some G_* instructions with immediates, like
> G_FENCE or G_EXTRACT.
>
> This does include a workaround for a crash in GlobalISelEmitter when
> ARM tries to uses "imm" in an output with a "timm" pattern source.
llvm-svn: 372314
2019-09-19 20:33:07 +08:00
|
|
|
i32:$voffset, i32:$soffset, imm:$offset,
|
|
|
|
imm:$cachepolicy, imm),
|
2019-07-11 08:10:17 +08:00
|
|
|
(!cast<MUBUF_Pseudo>(opcode # _BOTHEN)
|
|
|
|
$vdata_in,
|
|
|
|
(REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
|
|
|
|
$rsrc, $soffset, (as_i16imm $offset), (extract_slc $cachepolicy))
|
|
|
|
>;
|
|
|
|
}
|
|
|
|
|
|
|
|
defm : BufferAtomicPatterns_NO_RTN<SIbuffer_atomic_fadd, f32, "BUFFER_ATOMIC_ADD_F32">;
|
|
|
|
defm : BufferAtomicPatterns_NO_RTN<SIbuffer_atomic_pk_fadd, v2f16, "BUFFER_ATOMIC_PK_ADD_F16">;
|
|
|
|
|
2017-10-03 08:06:41 +08:00
|
|
|
def : GCNPat<
|
2017-11-09 09:52:48 +08:00
|
|
|
(SIbuffer_atomic_cmpswap
|
2016-09-10 21:09:16 +08:00
|
|
|
i32:$data, i32:$cmp, v4i32:$rsrc, 0,
|
Revert r372285 "GlobalISel: Don't materialize immarg arguments to intrinsics"
This broke the Chromium build, causing it to fail with e.g.
fatal error: error in backend: Cannot select: t362: v4i32 = X86ISD::VSHLI t392, Constant:i8<15>
See llvm-commits thread of r372285 for details.
This also reverts r372286, r372287, r372288, r372289, r372290, r372291,
r372292, r372293, r372296, and r372297, which seemed to depend on the
main commit.
> Encode them directly as an imm argument to G_INTRINSIC*.
>
> Since now intrinsics can now define what parameters are required to be
> immediates, avoid using registers for them. Intrinsics could
> potentially want a constant that isn't a legal register type. Also,
> since G_CONSTANT is subject to CSE and legalization, transforms could
> potentially obscure the value (and create extra work for the
> selector). The register bank of a G_CONSTANT is also meaningful, so
> this could throw off future folding and legalization logic for AMDGPU.
>
> This will be much more convenient to work with than needing to call
> getConstantVRegVal and checking if it may have failed for every
> constant intrinsic parameter. AMDGPU has quite a lot of intrinsics wth
> immarg operands, many of which need inspection during lowering. Having
> to find the value in a register is going to add a lot of boilerplate
> and waste compile time.
>
> SelectionDAG has always provided TargetConstant for constants which
> should not be legalized or materialized in a register. The distinction
> between Constant and TargetConstant was somewhat fuzzy, and there was
> no automatic way to force usage of TargetConstant for certain
> intrinsic parameters. They were both ultimately ConstantSDNode, and it
> was inconsistently used. It was quite easy to mis-select an
> instruction requiring an immediate. For SelectionDAG, start emitting
> TargetConstant for these arguments, and using timm to match them.
>
> Most of the work here is to cleanup target handling of constants. Some
> targets process intrinsics through intermediate custom nodes, which
> need to preserve TargetConstant usage to match the intrinsic
> expectation. Pattern inputs now need to distinguish whether a constant
> is merely compatible with an operand or whether it is mandatory.
>
> The GlobalISelEmitter needs to treat timm as a special case of a leaf
> node, simlar to MachineBasicBlock operands. This should also enable
> handling of patterns for some G_* instructions with immediates, like
> G_FENCE or G_EXTRACT.
>
> This does include a workaround for a crash in GlobalISelEmitter when
> ARM tries to uses "imm" in an output with a "timm" pattern source.
llvm-svn: 372314
2019-09-19 20:33:07 +08:00
|
|
|
0, i32:$soffset, imm:$offset,
|
|
|
|
imm:$cachepolicy, 0),
|
2016-09-10 21:09:16 +08:00
|
|
|
(EXTRACT_SUBREG
|
2017-07-21 05:06:04 +08:00
|
|
|
(BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN
|
2016-09-10 21:09:16 +08:00
|
|
|
(REG_SEQUENCE VReg_64, $data, sub0, $cmp, sub1),
|
[AMDGPU] New buffer intrinsics
Summary:
This commit adds new intrinsics
llvm.amdgcn.raw.buffer.load
llvm.amdgcn.raw.buffer.load.format
llvm.amdgcn.raw.buffer.load.format.d16
llvm.amdgcn.struct.buffer.load
llvm.amdgcn.struct.buffer.load.format
llvm.amdgcn.struct.buffer.load.format.d16
llvm.amdgcn.raw.buffer.store
llvm.amdgcn.raw.buffer.store.format
llvm.amdgcn.raw.buffer.store.format.d16
llvm.amdgcn.struct.buffer.store
llvm.amdgcn.struct.buffer.store.format
llvm.amdgcn.struct.buffer.store.format.d16
llvm.amdgcn.raw.buffer.atomic.*
llvm.amdgcn.struct.buffer.atomic.*
with the following changes from the llvm.amdgcn.buffer.*
intrinsics:
* there are separate raw and struct versions: raw does not have an
index arg and sets idxen=0 in the instruction, and struct always sets
idxen=1 in the instruction even if the index is 0, to allow for the
fact that gfx9 does bounds checking differently depending on whether
idxen is set;
* there is a combined cachepolicy arg (glc+slc)
* there are now only two offset args: one for the offset that is
included in bounds checking and swizzling, to be split between the
instruction's voffset and immoffset fields, and one for the offset
that is excluded from bounds checking and swizzling, to go into the
instruction's soffset field.
The AMDISD::BUFFER_* SD nodes always have an index operand, all three
offset operands, combined cachepolicy operand, and an extra idxen
operand.
The obsolescent llvm.amdgcn.buffer.* intrinsics continue to work.
Subscribers: arsenm, kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, t-tye, jfb, llvm-commits
Differential Revision: https://reviews.llvm.org/D50306
Change-Id: If897ea7dc34fcbf4d5496e98cc99a934f62fc205
llvm-svn: 340269
2018-08-21 19:07:10 +08:00
|
|
|
$rsrc, $soffset, (as_i16imm $offset), (extract_slc $cachepolicy)),
|
2016-09-10 21:09:16 +08:00
|
|
|
sub0)
|
|
|
|
>;
|
|
|
|
|
2017-10-03 08:06:41 +08:00
|
|
|
def : GCNPat<
|
2017-11-09 09:52:48 +08:00
|
|
|
(SIbuffer_atomic_cmpswap
|
2016-09-10 21:09:16 +08:00
|
|
|
i32:$data, i32:$cmp, v4i32:$rsrc, i32:$vindex,
|
Revert r372285 "GlobalISel: Don't materialize immarg arguments to intrinsics"
This broke the Chromium build, causing it to fail with e.g.
fatal error: error in backend: Cannot select: t362: v4i32 = X86ISD::VSHLI t392, Constant:i8<15>
See llvm-commits thread of r372285 for details.
This also reverts r372286, r372287, r372288, r372289, r372290, r372291,
r372292, r372293, r372296, and r372297, which seemed to depend on the
main commit.
> Encode them directly as an imm argument to G_INTRINSIC*.
>
> Since now intrinsics can now define what parameters are required to be
> immediates, avoid using registers for them. Intrinsics could
> potentially want a constant that isn't a legal register type. Also,
> since G_CONSTANT is subject to CSE and legalization, transforms could
> potentially obscure the value (and create extra work for the
> selector). The register bank of a G_CONSTANT is also meaningful, so
> this could throw off future folding and legalization logic for AMDGPU.
>
> This will be much more convenient to work with than needing to call
> getConstantVRegVal and checking if it may have failed for every
> constant intrinsic parameter. AMDGPU has quite a lot of intrinsics wth
> immarg operands, many of which need inspection during lowering. Having
> to find the value in a register is going to add a lot of boilerplate
> and waste compile time.
>
> SelectionDAG has always provided TargetConstant for constants which
> should not be legalized or materialized in a register. The distinction
> between Constant and TargetConstant was somewhat fuzzy, and there was
> no automatic way to force usage of TargetConstant for certain
> intrinsic parameters. They were both ultimately ConstantSDNode, and it
> was inconsistently used. It was quite easy to mis-select an
> instruction requiring an immediate. For SelectionDAG, start emitting
> TargetConstant for these arguments, and using timm to match them.
>
> Most of the work here is to cleanup target handling of constants. Some
> targets process intrinsics through intermediate custom nodes, which
> need to preserve TargetConstant usage to match the intrinsic
> expectation. Pattern inputs now need to distinguish whether a constant
> is merely compatible with an operand or whether it is mandatory.
>
> The GlobalISelEmitter needs to treat timm as a special case of a leaf
> node, simlar to MachineBasicBlock operands. This should also enable
> handling of patterns for some G_* instructions with immediates, like
> G_FENCE or G_EXTRACT.
>
> This does include a workaround for a crash in GlobalISelEmitter when
> ARM tries to uses "imm" in an output with a "timm" pattern source.
llvm-svn: 372314
2019-09-19 20:33:07 +08:00
|
|
|
0, i32:$soffset, imm:$offset,
|
|
|
|
imm:$cachepolicy, imm),
|
2016-09-10 21:09:16 +08:00
|
|
|
(EXTRACT_SUBREG
|
2017-07-21 05:06:04 +08:00
|
|
|
(BUFFER_ATOMIC_CMPSWAP_IDXEN_RTN
|
2016-09-10 21:09:16 +08:00
|
|
|
(REG_SEQUENCE VReg_64, $data, sub0, $cmp, sub1),
|
[AMDGPU] New buffer intrinsics
Summary:
This commit adds new intrinsics
llvm.amdgcn.raw.buffer.load
llvm.amdgcn.raw.buffer.load.format
llvm.amdgcn.raw.buffer.load.format.d16
llvm.amdgcn.struct.buffer.load
llvm.amdgcn.struct.buffer.load.format
llvm.amdgcn.struct.buffer.load.format.d16
llvm.amdgcn.raw.buffer.store
llvm.amdgcn.raw.buffer.store.format
llvm.amdgcn.raw.buffer.store.format.d16
llvm.amdgcn.struct.buffer.store
llvm.amdgcn.struct.buffer.store.format
llvm.amdgcn.struct.buffer.store.format.d16
llvm.amdgcn.raw.buffer.atomic.*
llvm.amdgcn.struct.buffer.atomic.*
with the following changes from the llvm.amdgcn.buffer.*
intrinsics:
* there are separate raw and struct versions: raw does not have an
index arg and sets idxen=0 in the instruction, and struct always sets
idxen=1 in the instruction even if the index is 0, to allow for the
fact that gfx9 does bounds checking differently depending on whether
idxen is set;
* there is a combined cachepolicy arg (glc+slc)
* there are now only two offset args: one for the offset that is
included in bounds checking and swizzling, to be split between the
instruction's voffset and immoffset fields, and one for the offset
that is excluded from bounds checking and swizzling, to go into the
instruction's soffset field.
The AMDISD::BUFFER_* SD nodes always have an index operand, all three
offset operands, combined cachepolicy operand, and an extra idxen
operand.
The obsolescent llvm.amdgcn.buffer.* intrinsics continue to work.
Subscribers: arsenm, kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, t-tye, jfb, llvm-commits
Differential Revision: https://reviews.llvm.org/D50306
Change-Id: If897ea7dc34fcbf4d5496e98cc99a934f62fc205
llvm-svn: 340269
2018-08-21 19:07:10 +08:00
|
|
|
$vindex, $rsrc, $soffset, (as_i16imm $offset), (extract_slc $cachepolicy)),
|
2016-09-10 21:09:16 +08:00
|
|
|
sub0)
|
|
|
|
>;
|
|
|
|
|
2017-10-03 08:06:41 +08:00
|
|
|
def : GCNPat<
|
2017-11-09 09:52:48 +08:00
|
|
|
(SIbuffer_atomic_cmpswap
|
2016-09-10 21:09:16 +08:00
|
|
|
i32:$data, i32:$cmp, v4i32:$rsrc, 0,
|
Revert r372285 "GlobalISel: Don't materialize immarg arguments to intrinsics"
This broke the Chromium build, causing it to fail with e.g.
fatal error: error in backend: Cannot select: t362: v4i32 = X86ISD::VSHLI t392, Constant:i8<15>
See llvm-commits thread of r372285 for details.
This also reverts r372286, r372287, r372288, r372289, r372290, r372291,
r372292, r372293, r372296, and r372297, which seemed to depend on the
main commit.
> Encode them directly as an imm argument to G_INTRINSIC*.
>
> Since now intrinsics can now define what parameters are required to be
> immediates, avoid using registers for them. Intrinsics could
> potentially want a constant that isn't a legal register type. Also,
> since G_CONSTANT is subject to CSE and legalization, transforms could
> potentially obscure the value (and create extra work for the
> selector). The register bank of a G_CONSTANT is also meaningful, so
> this could throw off future folding and legalization logic for AMDGPU.
>
> This will be much more convenient to work with than needing to call
> getConstantVRegVal and checking if it may have failed for every
> constant intrinsic parameter. AMDGPU has quite a lot of intrinsics wth
> immarg operands, many of which need inspection during lowering. Having
> to find the value in a register is going to add a lot of boilerplate
> and waste compile time.
>
> SelectionDAG has always provided TargetConstant for constants which
> should not be legalized or materialized in a register. The distinction
> between Constant and TargetConstant was somewhat fuzzy, and there was
> no automatic way to force usage of TargetConstant for certain
> intrinsic parameters. They were both ultimately ConstantSDNode, and it
> was inconsistently used. It was quite easy to mis-select an
> instruction requiring an immediate. For SelectionDAG, start emitting
> TargetConstant for these arguments, and using timm to match them.
>
> Most of the work here is to cleanup target handling of constants. Some
> targets process intrinsics through intermediate custom nodes, which
> need to preserve TargetConstant usage to match the intrinsic
> expectation. Pattern inputs now need to distinguish whether a constant
> is merely compatible with an operand or whether it is mandatory.
>
> The GlobalISelEmitter needs to treat timm as a special case of a leaf
> node, simlar to MachineBasicBlock operands. This should also enable
> handling of patterns for some G_* instructions with immediates, like
> G_FENCE or G_EXTRACT.
>
> This does include a workaround for a crash in GlobalISelEmitter when
> ARM tries to uses "imm" in an output with a "timm" pattern source.
llvm-svn: 372314
2019-09-19 20:33:07 +08:00
|
|
|
i32:$voffset, i32:$soffset, imm:$offset,
|
|
|
|
imm:$cachepolicy, 0),
|
2016-09-10 21:09:16 +08:00
|
|
|
(EXTRACT_SUBREG
|
2017-07-21 05:06:04 +08:00
|
|
|
(BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN
|
2016-09-10 21:09:16 +08:00
|
|
|
(REG_SEQUENCE VReg_64, $data, sub0, $cmp, sub1),
|
[AMDGPU] New buffer intrinsics
Summary:
This commit adds new intrinsics
llvm.amdgcn.raw.buffer.load
llvm.amdgcn.raw.buffer.load.format
llvm.amdgcn.raw.buffer.load.format.d16
llvm.amdgcn.struct.buffer.load
llvm.amdgcn.struct.buffer.load.format
llvm.amdgcn.struct.buffer.load.format.d16
llvm.amdgcn.raw.buffer.store
llvm.amdgcn.raw.buffer.store.format
llvm.amdgcn.raw.buffer.store.format.d16
llvm.amdgcn.struct.buffer.store
llvm.amdgcn.struct.buffer.store.format
llvm.amdgcn.struct.buffer.store.format.d16
llvm.amdgcn.raw.buffer.atomic.*
llvm.amdgcn.struct.buffer.atomic.*
with the following changes from the llvm.amdgcn.buffer.*
intrinsics:
* there are separate raw and struct versions: raw does not have an
index arg and sets idxen=0 in the instruction, and struct always sets
idxen=1 in the instruction even if the index is 0, to allow for the
fact that gfx9 does bounds checking differently depending on whether
idxen is set;
* there is a combined cachepolicy arg (glc+slc)
* there are now only two offset args: one for the offset that is
included in bounds checking and swizzling, to be split between the
instruction's voffset and immoffset fields, and one for the offset
that is excluded from bounds checking and swizzling, to go into the
instruction's soffset field.
The AMDISD::BUFFER_* SD nodes always have an index operand, all three
offset operands, combined cachepolicy operand, and an extra idxen
operand.
The obsolescent llvm.amdgcn.buffer.* intrinsics continue to work.
Subscribers: arsenm, kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, t-tye, jfb, llvm-commits
Differential Revision: https://reviews.llvm.org/D50306
Change-Id: If897ea7dc34fcbf4d5496e98cc99a934f62fc205
llvm-svn: 340269
2018-08-21 19:07:10 +08:00
|
|
|
$voffset, $rsrc, $soffset, (as_i16imm $offset), (extract_slc $cachepolicy)),
|
2016-09-10 21:09:16 +08:00
|
|
|
sub0)
|
|
|
|
>;
|
|
|
|
|
2017-10-03 08:06:41 +08:00
|
|
|
def : GCNPat<
|
2017-11-09 09:52:48 +08:00
|
|
|
(SIbuffer_atomic_cmpswap
|
2016-09-10 21:09:16 +08:00
|
|
|
i32:$data, i32:$cmp, v4i32:$rsrc, i32:$vindex,
|
Revert r372285 "GlobalISel: Don't materialize immarg arguments to intrinsics"
This broke the Chromium build, causing it to fail with e.g.
fatal error: error in backend: Cannot select: t362: v4i32 = X86ISD::VSHLI t392, Constant:i8<15>
See llvm-commits thread of r372285 for details.
This also reverts r372286, r372287, r372288, r372289, r372290, r372291,
r372292, r372293, r372296, and r372297, which seemed to depend on the
main commit.
> Encode them directly as an imm argument to G_INTRINSIC*.
>
> Since now intrinsics can now define what parameters are required to be
> immediates, avoid using registers for them. Intrinsics could
> potentially want a constant that isn't a legal register type. Also,
> since G_CONSTANT is subject to CSE and legalization, transforms could
> potentially obscure the value (and create extra work for the
> selector). The register bank of a G_CONSTANT is also meaningful, so
> this could throw off future folding and legalization logic for AMDGPU.
>
> This will be much more convenient to work with than needing to call
> getConstantVRegVal and checking if it may have failed for every
> constant intrinsic parameter. AMDGPU has quite a lot of intrinsics wth
> immarg operands, many of which need inspection during lowering. Having
> to find the value in a register is going to add a lot of boilerplate
> and waste compile time.
>
> SelectionDAG has always provided TargetConstant for constants which
> should not be legalized or materialized in a register. The distinction
> between Constant and TargetConstant was somewhat fuzzy, and there was
> no automatic way to force usage of TargetConstant for certain
> intrinsic parameters. They were both ultimately ConstantSDNode, and it
> was inconsistently used. It was quite easy to mis-select an
> instruction requiring an immediate. For SelectionDAG, start emitting
> TargetConstant for these arguments, and using timm to match them.
>
> Most of the work here is to cleanup target handling of constants. Some
> targets process intrinsics through intermediate custom nodes, which
> need to preserve TargetConstant usage to match the intrinsic
> expectation. Pattern inputs now need to distinguish whether a constant
> is merely compatible with an operand or whether it is mandatory.
>
> The GlobalISelEmitter needs to treat timm as a special case of a leaf
> node, simlar to MachineBasicBlock operands. This should also enable
> handling of patterns for some G_* instructions with immediates, like
> G_FENCE or G_EXTRACT.
>
> This does include a workaround for a crash in GlobalISelEmitter when
> ARM tries to uses "imm" in an output with a "timm" pattern source.
llvm-svn: 372314
2019-09-19 20:33:07 +08:00
|
|
|
i32:$voffset, i32:$soffset, imm:$offset,
|
|
|
|
imm:$cachepolicy, imm),
|
2016-09-10 21:09:16 +08:00
|
|
|
(EXTRACT_SUBREG
|
2017-07-21 05:06:04 +08:00
|
|
|
(BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN
|
2016-09-10 21:09:16 +08:00
|
|
|
(REG_SEQUENCE VReg_64, $data, sub0, $cmp, sub1),
|
|
|
|
(REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
|
[AMDGPU] New buffer intrinsics
Summary:
This commit adds new intrinsics
llvm.amdgcn.raw.buffer.load
llvm.amdgcn.raw.buffer.load.format
llvm.amdgcn.raw.buffer.load.format.d16
llvm.amdgcn.struct.buffer.load
llvm.amdgcn.struct.buffer.load.format
llvm.amdgcn.struct.buffer.load.format.d16
llvm.amdgcn.raw.buffer.store
llvm.amdgcn.raw.buffer.store.format
llvm.amdgcn.raw.buffer.store.format.d16
llvm.amdgcn.struct.buffer.store
llvm.amdgcn.struct.buffer.store.format
llvm.amdgcn.struct.buffer.store.format.d16
llvm.amdgcn.raw.buffer.atomic.*
llvm.amdgcn.struct.buffer.atomic.*
with the following changes from the llvm.amdgcn.buffer.*
intrinsics:
* there are separate raw and struct versions: raw does not have an
index arg and sets idxen=0 in the instruction, and struct always sets
idxen=1 in the instruction even if the index is 0, to allow for the
fact that gfx9 does bounds checking differently depending on whether
idxen is set;
* there is a combined cachepolicy arg (glc+slc)
* there are now only two offset args: one for the offset that is
included in bounds checking and swizzling, to be split between the
instruction's voffset and immoffset fields, and one for the offset
that is excluded from bounds checking and swizzling, to go into the
instruction's soffset field.
The AMDISD::BUFFER_* SD nodes always have an index operand, all three
offset operands, combined cachepolicy operand, and an extra idxen
operand.
The obsolescent llvm.amdgcn.buffer.* intrinsics continue to work.
Subscribers: arsenm, kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, t-tye, jfb, llvm-commits
Differential Revision: https://reviews.llvm.org/D50306
Change-Id: If897ea7dc34fcbf4d5496e98cc99a934f62fc205
llvm-svn: 340269
2018-08-21 19:07:10 +08:00
|
|
|
$rsrc, $soffset, (as_i16imm $offset), (extract_slc $cachepolicy)),
|
2016-09-10 21:09:16 +08:00
|
|
|
sub0)
|
|
|
|
>;
|
|
|
|
|
2016-11-11 00:02:37 +08:00
|
|
|
class MUBUFLoad_PatternADDR64 <MUBUF_Pseudo Instr_ADDR64, ValueType vt,
|
2017-10-03 08:06:41 +08:00
|
|
|
PatFrag constant_ld> : GCNPat <
|
2016-09-10 21:09:16 +08:00
|
|
|
(vt (constant_ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset,
|
2019-05-01 06:08:23 +08:00
|
|
|
i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc))),
|
|
|
|
(Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset, $glc, $slc, $tfe, $dlc)
|
2016-09-10 21:09:16 +08:00
|
|
|
>;
|
|
|
|
|
|
|
|
multiclass MUBUFLoad_Atomic_Pattern <MUBUF_Pseudo Instr_ADDR64, MUBUF_Pseudo Instr_OFFSET,
|
|
|
|
ValueType vt, PatFrag atomic_ld> {
|
2017-10-03 08:06:41 +08:00
|
|
|
def : GCNPat <
|
2016-09-10 21:09:16 +08:00
|
|
|
(vt (atomic_ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset,
|
|
|
|
i16:$offset, i1:$slc))),
|
2019-05-01 06:08:23 +08:00
|
|
|
(Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset, 0, $slc, 0, 0)
|
2016-09-10 21:09:16 +08:00
|
|
|
>;
|
|
|
|
|
2017-10-03 08:06:41 +08:00
|
|
|
def : GCNPat <
|
2016-09-10 21:09:16 +08:00
|
|
|
(vt (atomic_ld (MUBUFOffsetNoGLC v4i32:$rsrc, i32:$soffset, i16:$offset))),
|
2019-05-01 06:08:23 +08:00
|
|
|
(Instr_OFFSET $rsrc, $soffset, (as_i16imm $offset), 0, 0, 0, 0)
|
2016-09-10 21:09:16 +08:00
|
|
|
>;
|
|
|
|
}
|
|
|
|
|
2019-04-06 02:24:34 +08:00
|
|
|
let SubtargetPredicate = isGFX6GFX7 in {
|
2016-11-11 00:02:37 +08:00
|
|
|
def : MUBUFLoad_PatternADDR64 <BUFFER_LOAD_SBYTE_ADDR64, i32, sextloadi8_constant>;
|
2019-07-16 10:46:05 +08:00
|
|
|
def : MUBUFLoad_PatternADDR64 <BUFFER_LOAD_UBYTE_ADDR64, i32, extloadi8_constant>;
|
|
|
|
def : MUBUFLoad_PatternADDR64 <BUFFER_LOAD_UBYTE_ADDR64, i32, zextloadi8_constant>;
|
2016-11-11 00:02:37 +08:00
|
|
|
def : MUBUFLoad_PatternADDR64 <BUFFER_LOAD_SSHORT_ADDR64, i32, sextloadi16_constant>;
|
2019-07-16 10:46:05 +08:00
|
|
|
def : MUBUFLoad_PatternADDR64 <BUFFER_LOAD_USHORT_ADDR64, i32, extloadi16_constant>;
|
|
|
|
def : MUBUFLoad_PatternADDR64 <BUFFER_LOAD_USHORT_ADDR64, i32, zextloadi16_constant>;
|
2016-09-10 21:09:16 +08:00
|
|
|
|
2019-07-17 01:38:50 +08:00
|
|
|
defm : MUBUFLoad_Atomic_Pattern <BUFFER_LOAD_DWORD_ADDR64, BUFFER_LOAD_DWORD_OFFSET, i32, atomic_load_32_global>;
|
|
|
|
defm : MUBUFLoad_Atomic_Pattern <BUFFER_LOAD_DWORDX2_ADDR64, BUFFER_LOAD_DWORDX2_OFFSET, i64, atomic_load_64_global>;
|
2019-04-06 02:24:34 +08:00
|
|
|
} // End SubtargetPredicate = isGFX6GFX7
|
2016-09-10 21:09:16 +08:00
|
|
|
|
2016-11-11 00:02:37 +08:00
|
|
|
multiclass MUBUFLoad_Pattern <MUBUF_Pseudo Instr_OFFSET, ValueType vt,
|
|
|
|
PatFrag ld> {
|
|
|
|
|
2017-10-03 08:06:41 +08:00
|
|
|
def : GCNPat <
|
2016-11-11 00:02:37 +08:00
|
|
|
(vt (ld (MUBUFOffset v4i32:$srsrc, i32:$soffset,
|
2019-05-01 06:08:23 +08:00
|
|
|
i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc))),
|
|
|
|
(Instr_OFFSET $srsrc, $soffset, $offset, $glc, $slc, $tfe, $dlc)
|
2016-11-11 00:02:37 +08:00
|
|
|
>;
|
|
|
|
}
|
|
|
|
|
2017-10-03 08:06:41 +08:00
|
|
|
let OtherPredicates = [Has16BitInsts] in {
|
2016-11-11 00:02:37 +08:00
|
|
|
|
|
|
|
defm : MUBUFLoad_Pattern <BUFFER_LOAD_SBYTE_OFFSET, i16, sextloadi8_constant>;
|
2019-07-16 10:46:05 +08:00
|
|
|
defm : MUBUFLoad_Pattern <BUFFER_LOAD_UBYTE_OFFSET, i16, extloadi8_constant>;
|
|
|
|
defm : MUBUFLoad_Pattern <BUFFER_LOAD_UBYTE_OFFSET, i16, zextloadi8_constant>;
|
2019-07-09 00:53:53 +08:00
|
|
|
defm : MUBUFLoad_Pattern <BUFFER_LOAD_SBYTE_OFFSET, i16, sextloadi8_global>;
|
2019-07-16 10:46:05 +08:00
|
|
|
defm : MUBUFLoad_Pattern <BUFFER_LOAD_UBYTE_OFFSET, i16, extloadi8_global>;
|
|
|
|
defm : MUBUFLoad_Pattern <BUFFER_LOAD_UBYTE_OFFSET, i16, zextloadi8_global>;
|
2016-11-11 00:02:37 +08:00
|
|
|
|
2019-07-09 00:53:53 +08:00
|
|
|
defm : MUBUFLoad_Pattern <BUFFER_LOAD_USHORT_OFFSET, i16, load_global>;
|
2017-09-07 13:37:34 +08:00
|
|
|
|
2017-10-03 08:06:41 +08:00
|
|
|
} // End OtherPredicates = [Has16BitInsts]
|
2016-11-11 00:02:37 +08:00
|
|
|
|
2017-04-25 03:40:59 +08:00
|
|
|
multiclass MUBUFScratchLoadPat <MUBUF_Pseudo InstrOffen,
|
|
|
|
MUBUF_Pseudo InstrOffset,
|
|
|
|
ValueType vt, PatFrag ld> {
|
2017-10-03 08:06:41 +08:00
|
|
|
def : GCNPat <
|
2017-04-25 03:40:59 +08:00
|
|
|
(vt (ld (MUBUFScratchOffen v4i32:$srsrc, i32:$vaddr,
|
|
|
|
i32:$soffset, u16imm:$offset))),
|
2019-05-01 06:08:23 +08:00
|
|
|
(InstrOffen $vaddr, $srsrc, $soffset, $offset, 0, 0, 0, 0)
|
2017-04-25 03:40:59 +08:00
|
|
|
>;
|
|
|
|
|
2017-10-03 08:06:41 +08:00
|
|
|
def : GCNPat <
|
2017-04-25 03:40:59 +08:00
|
|
|
(vt (ld (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset, u16imm:$offset))),
|
2019-05-01 06:08:23 +08:00
|
|
|
(InstrOffset $srsrc, $soffset, $offset, 0, 0, 0, 0)
|
2017-04-25 03:40:59 +08:00
|
|
|
>;
|
|
|
|
}
|
2016-09-10 21:09:16 +08:00
|
|
|
|
2017-09-20 13:01:53 +08:00
|
|
|
// XXX - Is it possible to have a complex pattern in a PatFrag?
|
2019-03-09 04:58:11 +08:00
|
|
|
multiclass MUBUFScratchLoadPat_D16 <MUBUF_Pseudo InstrOffen,
|
2017-09-20 13:01:53 +08:00
|
|
|
MUBUF_Pseudo InstrOffset,
|
2019-03-09 04:58:11 +08:00
|
|
|
ValueType vt, PatFrag ld_frag> {
|
2017-10-03 08:06:41 +08:00
|
|
|
def : GCNPat <
|
2019-03-09 04:58:11 +08:00
|
|
|
(ld_frag (MUBUFScratchOffen v4i32:$srsrc, i32:$vaddr, i32:$soffset, u16imm:$offset), vt:$in),
|
2019-05-01 06:08:23 +08:00
|
|
|
(InstrOffen $vaddr, $srsrc, $soffset, $offset, 0, 0, 0, 0, $in)
|
2017-11-13 08:22:09 +08:00
|
|
|
>;
|
|
|
|
|
|
|
|
def : GCNPat <
|
2019-03-09 04:58:11 +08:00
|
|
|
(ld_frag (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset, u16imm:$offset), vt:$in),
|
2019-05-01 06:08:23 +08:00
|
|
|
(InstrOffset $srsrc, $soffset, $offset, 0, 0, 0, 0, $in)
|
2017-11-13 08:22:09 +08:00
|
|
|
>;
|
|
|
|
}
|
|
|
|
|
2017-04-25 03:40:59 +08:00
|
|
|
defm : MUBUFScratchLoadPat <BUFFER_LOAD_SBYTE_OFFEN, BUFFER_LOAD_SBYTE_OFFSET, i32, sextloadi8_private>;
|
2019-07-16 10:46:05 +08:00
|
|
|
defm : MUBUFScratchLoadPat <BUFFER_LOAD_UBYTE_OFFEN, BUFFER_LOAD_UBYTE_OFFSET, i32, extloadi8_private>;
|
|
|
|
defm : MUBUFScratchLoadPat <BUFFER_LOAD_UBYTE_OFFEN, BUFFER_LOAD_UBYTE_OFFSET, i32, zextloadi8_private>;
|
2017-04-25 03:40:59 +08:00
|
|
|
defm : MUBUFScratchLoadPat <BUFFER_LOAD_SBYTE_OFFEN, BUFFER_LOAD_SBYTE_OFFSET, i16, sextloadi8_private>;
|
2019-07-16 10:46:05 +08:00
|
|
|
defm : MUBUFScratchLoadPat <BUFFER_LOAD_UBYTE_OFFEN, BUFFER_LOAD_UBYTE_OFFSET, i16, extloadi8_private>;
|
|
|
|
defm : MUBUFScratchLoadPat <BUFFER_LOAD_UBYTE_OFFEN, BUFFER_LOAD_UBYTE_OFFSET, i16, zextloadi8_private>;
|
2017-04-25 03:40:59 +08:00
|
|
|
defm : MUBUFScratchLoadPat <BUFFER_LOAD_SSHORT_OFFEN, BUFFER_LOAD_SSHORT_OFFSET, i32, sextloadi16_private>;
|
2019-07-16 10:46:05 +08:00
|
|
|
defm : MUBUFScratchLoadPat <BUFFER_LOAD_USHORT_OFFEN, BUFFER_LOAD_USHORT_OFFSET, i32, extloadi16_private>;
|
|
|
|
defm : MUBUFScratchLoadPat <BUFFER_LOAD_USHORT_OFFEN, BUFFER_LOAD_USHORT_OFFSET, i32, zextloadi16_private>;
|
2017-09-07 13:37:34 +08:00
|
|
|
defm : MUBUFScratchLoadPat <BUFFER_LOAD_USHORT_OFFEN, BUFFER_LOAD_USHORT_OFFSET, i16, load_private>;
|
2019-09-06 08:36:06 +08:00
|
|
|
|
2019-09-06 08:36:10 +08:00
|
|
|
foreach vt = Reg32Types.types in {
|
2017-04-25 03:40:59 +08:00
|
|
|
defm : MUBUFScratchLoadPat <BUFFER_LOAD_DWORD_OFFEN, BUFFER_LOAD_DWORD_OFFSET, i32, load_private>;
|
2019-09-06 08:36:06 +08:00
|
|
|
}
|
2017-04-25 03:40:59 +08:00
|
|
|
defm : MUBUFScratchLoadPat <BUFFER_LOAD_DWORDX2_OFFEN, BUFFER_LOAD_DWORDX2_OFFSET, v2i32, load_private>;
|
2019-03-21 20:01:21 +08:00
|
|
|
defm : MUBUFScratchLoadPat <BUFFER_LOAD_DWORDX3_OFFEN, BUFFER_LOAD_DWORDX3_OFFSET, v3i32, load_private>;
|
2017-04-25 03:40:59 +08:00
|
|
|
defm : MUBUFScratchLoadPat <BUFFER_LOAD_DWORDX4_OFFEN, BUFFER_LOAD_DWORDX4_OFFSET, v4i32, load_private>;
|
2016-09-10 21:09:16 +08:00
|
|
|
|
2018-05-05 04:06:57 +08:00
|
|
|
let OtherPredicates = [D16PreservesUnusedBits] in {
|
2019-03-09 04:58:11 +08:00
|
|
|
defm : MUBUFScratchLoadPat_D16<BUFFER_LOAD_SHORT_D16_HI_OFFEN, BUFFER_LOAD_SHORT_D16_HI_OFFSET, v2i16, load_d16_hi_private>;
|
|
|
|
defm : MUBUFScratchLoadPat_D16<BUFFER_LOAD_UBYTE_D16_HI_OFFEN, BUFFER_LOAD_UBYTE_D16_HI_OFFSET, v2i16, az_extloadi8_d16_hi_private>;
|
|
|
|
defm : MUBUFScratchLoadPat_D16<BUFFER_LOAD_SBYTE_D16_HI_OFFEN, BUFFER_LOAD_SBYTE_D16_HI_OFFSET, v2i16, sextloadi8_d16_hi_private>;
|
|
|
|
defm : MUBUFScratchLoadPat_D16<BUFFER_LOAD_SHORT_D16_HI_OFFEN, BUFFER_LOAD_SHORT_D16_HI_OFFSET, v2f16, load_d16_hi_private>;
|
|
|
|
defm : MUBUFScratchLoadPat_D16<BUFFER_LOAD_UBYTE_D16_HI_OFFEN, BUFFER_LOAD_UBYTE_D16_HI_OFFSET, v2f16, az_extloadi8_d16_hi_private>;
|
|
|
|
defm : MUBUFScratchLoadPat_D16<BUFFER_LOAD_SBYTE_D16_HI_OFFEN, BUFFER_LOAD_SBYTE_D16_HI_OFFSET, v2f16, sextloadi8_d16_hi_private>;
|
|
|
|
|
|
|
|
defm : MUBUFScratchLoadPat_D16<BUFFER_LOAD_SHORT_D16_OFFEN, BUFFER_LOAD_SHORT_D16_OFFSET, v2i16, load_d16_lo_private>;
|
|
|
|
defm : MUBUFScratchLoadPat_D16<BUFFER_LOAD_UBYTE_D16_OFFEN, BUFFER_LOAD_UBYTE_D16_OFFSET, v2i16, az_extloadi8_d16_lo_private>;
|
|
|
|
defm : MUBUFScratchLoadPat_D16<BUFFER_LOAD_SBYTE_D16_OFFEN, BUFFER_LOAD_SBYTE_D16_OFFSET, v2i16, sextloadi8_d16_lo_private>;
|
|
|
|
defm : MUBUFScratchLoadPat_D16<BUFFER_LOAD_SHORT_D16_OFFEN, BUFFER_LOAD_SHORT_D16_OFFSET, v2f16, load_d16_lo_private>;
|
|
|
|
defm : MUBUFScratchLoadPat_D16<BUFFER_LOAD_UBYTE_D16_OFFEN, BUFFER_LOAD_UBYTE_D16_OFFSET, v2f16, az_extloadi8_d16_lo_private>;
|
|
|
|
defm : MUBUFScratchLoadPat_D16<BUFFER_LOAD_SBYTE_D16_OFFEN, BUFFER_LOAD_SBYTE_D16_OFFSET, v2f16, sextloadi8_d16_lo_private>;
|
2017-09-20 13:01:53 +08:00
|
|
|
}
|
2019-04-06 02:24:34 +08:00
|
|
|
|
2016-09-10 21:09:16 +08:00
|
|
|
multiclass MUBUFStore_Atomic_Pattern <MUBUF_Pseudo Instr_ADDR64, MUBUF_Pseudo Instr_OFFSET,
|
|
|
|
ValueType vt, PatFrag atomic_st> {
|
|
|
|
// Store follows atomic op convention so address is forst
|
2017-10-03 08:06:41 +08:00
|
|
|
def : GCNPat <
|
2016-09-10 21:09:16 +08:00
|
|
|
(atomic_st (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset,
|
|
|
|
i16:$offset, i1:$slc), vt:$val),
|
2019-05-01 06:08:23 +08:00
|
|
|
(Instr_ADDR64 $val, $vaddr, $srsrc, $soffset, $offset, 0, $slc, 0, 0)
|
2016-09-10 21:09:16 +08:00
|
|
|
>;
|
|
|
|
|
2017-10-03 08:06:41 +08:00
|
|
|
def : GCNPat <
|
2016-09-10 21:09:16 +08:00
|
|
|
(atomic_st (MUBUFOffsetNoGLC v4i32:$rsrc, i32:$soffset, i16:$offset), vt:$val),
|
2019-05-01 06:08:23 +08:00
|
|
|
(Instr_OFFSET $val, $rsrc, $soffset, (as_i16imm $offset), 0, 0, 0, 0)
|
2016-09-10 21:09:16 +08:00
|
|
|
>;
|
|
|
|
}
|
2019-04-06 02:24:34 +08:00
|
|
|
let SubtargetPredicate = isGFX6GFX7 in {
|
2017-09-20 11:43:35 +08:00
|
|
|
defm : MUBUFStore_Atomic_Pattern <BUFFER_STORE_DWORD_ADDR64, BUFFER_STORE_DWORD_OFFSET, i32, store_atomic_global>;
|
|
|
|
defm : MUBUFStore_Atomic_Pattern <BUFFER_STORE_DWORDX2_ADDR64, BUFFER_STORE_DWORDX2_OFFSET, i64, store_atomic_global>;
|
2019-04-06 02:24:34 +08:00
|
|
|
} // End Predicates = isGFX6GFX7
|
2016-09-10 21:09:16 +08:00
|
|
|
|
2016-11-11 00:02:37 +08:00
|
|
|
|
|
|
|
multiclass MUBUFStore_Pattern <MUBUF_Pseudo Instr_OFFSET, ValueType vt,
|
|
|
|
PatFrag st> {
|
|
|
|
|
2017-10-03 08:06:41 +08:00
|
|
|
def : GCNPat <
|
2016-11-11 00:02:37 +08:00
|
|
|
(st vt:$vdata, (MUBUFOffset v4i32:$srsrc, i32:$soffset,
|
2019-05-01 06:08:23 +08:00
|
|
|
i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc)),
|
|
|
|
(Instr_OFFSET $vdata, $srsrc, $soffset, $offset, $glc, $slc, $tfe, $dlc)
|
2016-11-11 00:02:37 +08:00
|
|
|
>;
|
|
|
|
}
|
|
|
|
|
|
|
|
defm : MUBUFStore_Pattern <BUFFER_STORE_BYTE_OFFSET, i16, truncstorei8_global>;
|
2017-09-20 11:43:35 +08:00
|
|
|
defm : MUBUFStore_Pattern <BUFFER_STORE_SHORT_OFFSET, i16, store_global>;
|
2016-11-11 00:02:37 +08:00
|
|
|
|
2017-04-25 03:40:59 +08:00
|
|
|
multiclass MUBUFScratchStorePat <MUBUF_Pseudo InstrOffen,
|
|
|
|
MUBUF_Pseudo InstrOffset,
|
2019-07-17 03:27:44 +08:00
|
|
|
ValueType vt, PatFrag st,
|
|
|
|
RegisterClass rc = VGPR_32> {
|
2017-10-03 08:06:41 +08:00
|
|
|
def : GCNPat <
|
2017-04-25 03:40:59 +08:00
|
|
|
(st vt:$value, (MUBUFScratchOffen v4i32:$srsrc, i32:$vaddr,
|
|
|
|
i32:$soffset, u16imm:$offset)),
|
2019-07-17 03:27:44 +08:00
|
|
|
(InstrOffen rc:$value, $vaddr, $srsrc, $soffset, $offset, 0, 0, 0, 0)
|
2017-04-25 03:40:59 +08:00
|
|
|
>;
|
|
|
|
|
2017-10-03 08:06:41 +08:00
|
|
|
def : GCNPat <
|
2017-04-25 03:40:59 +08:00
|
|
|
(st vt:$value, (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset,
|
|
|
|
u16imm:$offset)),
|
2019-07-17 03:27:44 +08:00
|
|
|
(InstrOffset rc:$value, $srsrc, $soffset, $offset, 0, 0, 0, 0)
|
2017-04-25 03:40:59 +08:00
|
|
|
>;
|
|
|
|
}
|
2016-09-10 21:09:16 +08:00
|
|
|
|
2017-04-25 03:40:59 +08:00
|
|
|
defm : MUBUFScratchStorePat <BUFFER_STORE_BYTE_OFFEN, BUFFER_STORE_BYTE_OFFSET, i32, truncstorei8_private>;
|
|
|
|
defm : MUBUFScratchStorePat <BUFFER_STORE_SHORT_OFFEN, BUFFER_STORE_SHORT_OFFSET, i32, truncstorei16_private>;
|
|
|
|
defm : MUBUFScratchStorePat <BUFFER_STORE_BYTE_OFFEN, BUFFER_STORE_BYTE_OFFSET, i16, truncstorei8_private>;
|
|
|
|
defm : MUBUFScratchStorePat <BUFFER_STORE_SHORT_OFFEN, BUFFER_STORE_SHORT_OFFSET, i16, store_private>;
|
2019-09-06 08:36:06 +08:00
|
|
|
|
2019-09-06 08:36:10 +08:00
|
|
|
foreach vt = Reg32Types.types in {
|
2019-09-06 08:36:06 +08:00
|
|
|
defm : MUBUFScratchStorePat <BUFFER_STORE_DWORD_OFFEN, BUFFER_STORE_DWORD_OFFSET, vt, store_private>;
|
|
|
|
}
|
|
|
|
|
2019-07-17 03:27:44 +08:00
|
|
|
defm : MUBUFScratchStorePat <BUFFER_STORE_DWORDX2_OFFEN, BUFFER_STORE_DWORDX2_OFFSET, v2i32, store_private, VReg_64>;
|
|
|
|
defm : MUBUFScratchStorePat <BUFFER_STORE_DWORDX3_OFFEN, BUFFER_STORE_DWORDX3_OFFSET, v3i32, store_private, VReg_96>;
|
|
|
|
defm : MUBUFScratchStorePat <BUFFER_STORE_DWORDX4_OFFEN, BUFFER_STORE_DWORDX4_OFFSET, v4i32, store_private, VReg_128>;
|
2016-09-10 21:09:16 +08:00
|
|
|
|
2017-09-20 11:20:09 +08:00
|
|
|
|
2018-05-05 04:06:57 +08:00
|
|
|
let OtherPredicates = [D16PreservesUnusedBits] in {
|
2017-09-20 11:20:09 +08:00
|
|
|
// Hiding the extract high pattern in the PatFrag seems to not
|
|
|
|
// automatically increase the complexity.
|
|
|
|
let AddedComplexity = 1 in {
|
2017-09-20 11:43:35 +08:00
|
|
|
defm : MUBUFScratchStorePat <BUFFER_STORE_SHORT_D16_HI_OFFEN, BUFFER_STORE_SHORT_D16_HI_OFFSET, i32, store_hi16_private>;
|
|
|
|
defm : MUBUFScratchStorePat <BUFFER_STORE_BYTE_D16_HI_OFFEN, BUFFER_STORE_BYTE_D16_HI_OFFSET, i32, truncstorei8_hi16_private>;
|
2017-09-20 11:20:09 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-09-10 21:09:16 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// MTBUF Patterns
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2017-06-23 00:29:22 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// tbuffer_load/store_format patterns
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
multiclass MTBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt,
|
|
|
|
string opcode> {
|
2017-10-03 08:06:41 +08:00
|
|
|
def : GCNPat<
|
Revert r372285 "GlobalISel: Don't materialize immarg arguments to intrinsics"
This broke the Chromium build, causing it to fail with e.g.
fatal error: error in backend: Cannot select: t362: v4i32 = X86ISD::VSHLI t392, Constant:i8<15>
See llvm-commits thread of r372285 for details.
This also reverts r372286, r372287, r372288, r372289, r372290, r372291,
r372292, r372293, r372296, and r372297, which seemed to depend on the
main commit.
> Encode them directly as an imm argument to G_INTRINSIC*.
>
> Since now intrinsics can now define what parameters are required to be
> immediates, avoid using registers for them. Intrinsics could
> potentially want a constant that isn't a legal register type. Also,
> since G_CONSTANT is subject to CSE and legalization, transforms could
> potentially obscure the value (and create extra work for the
> selector). The register bank of a G_CONSTANT is also meaningful, so
> this could throw off future folding and legalization logic for AMDGPU.
>
> This will be much more convenient to work with than needing to call
> getConstantVRegVal and checking if it may have failed for every
> constant intrinsic parameter. AMDGPU has quite a lot of intrinsics wth
> immarg operands, many of which need inspection during lowering. Having
> to find the value in a register is going to add a lot of boilerplate
> and waste compile time.
>
> SelectionDAG has always provided TargetConstant for constants which
> should not be legalized or materialized in a register. The distinction
> between Constant and TargetConstant was somewhat fuzzy, and there was
> no automatic way to force usage of TargetConstant for certain
> intrinsic parameters. They were both ultimately ConstantSDNode, and it
> was inconsistently used. It was quite easy to mis-select an
> instruction requiring an immediate. For SelectionDAG, start emitting
> TargetConstant for these arguments, and using timm to match them.
>
> Most of the work here is to cleanup target handling of constants. Some
> targets process intrinsics through intermediate custom nodes, which
> need to preserve TargetConstant usage to match the intrinsic
> expectation. Pattern inputs now need to distinguish whether a constant
> is merely compatible with an operand or whether it is mandatory.
>
> The GlobalISelEmitter needs to treat timm as a special case of a leaf
> node, simlar to MachineBasicBlock operands. This should also enable
> handling of patterns for some G_* instructions with immediates, like
> G_FENCE or G_EXTRACT.
>
> This does include a workaround for a crash in GlobalISelEmitter when
> ARM tries to uses "imm" in an output with a "timm" pattern source.
llvm-svn: 372314
2019-09-19 20:33:07 +08:00
|
|
|
(vt (name v4i32:$rsrc, 0, 0, i32:$soffset, imm:$offset,
|
|
|
|
imm:$format, imm:$cachepolicy, 0)),
|
2017-06-23 00:29:22 +08:00
|
|
|
(!cast<MTBUF_Pseudo>(opcode # _OFFSET) $rsrc, $soffset, (as_i16imm $offset),
|
[AMDGPU] New tbuffer intrinsics
Summary:
This commit adds new intrinsics
llvm.amdgcn.raw.tbuffer.load
llvm.amdgcn.struct.tbuffer.load
llvm.amdgcn.raw.tbuffer.store
llvm.amdgcn.struct.tbuffer.store
with the following changes from the llvm.amdgcn.tbuffer.* intrinsics:
* there are separate raw and struct versions: raw does not have an index
arg and sets idxen=0 in the instruction, and struct always sets
idxen=1 in the instruction even if the index is 0, to allow for the
fact that gfx9 does bounds checking differently depending on whether
idxen is set;
* there is a combined format arg (dfmt+nfmt)
* there is a combined cachepolicy arg (glc+slc)
* there are now only two offset args: one for the offset that is
included in bounds checking and swizzling, to be split between the
instruction's voffset and immoffset fields, and one for the offset
that is excluded from bounds checking and swizzling, to go into the
instruction's soffset field.
The AMDISD::TBUFFER_* SD nodes always have an index operand, all three
offset operands, combined format operand, combined cachepolicy operand,
and an extra idxen operand.
The tbuffer pseudo- and real instructions now also have a combined
format operand.
The obsolescent llvm.amdgcn.tbuffer.* and llvm.SI.tbuffer.store
intrinsics continue to work.
V2: Separate raw and struct intrinsics.
V3: Moved extract_glc and extract_slc defs to a more sensible place.
V4: Rebased on D49995.
V5: Only two separate offset args instead of three.
V6: Pseudo- and real instructions have joint format operand.
V7: Restored optionality of dfmt and nfmt in assembler.
V8: Addressed minor review comments.
Subscribers: arsenm, kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, t-tye, llvm-commits
Differential Revision: https://reviews.llvm.org/D49026
Change-Id: If22ad77e349fac3a5d2f72dda53c010377d470d4
llvm-svn: 340268
2018-08-21 19:06:05 +08:00
|
|
|
(as_i8imm $format),
|
2019-05-01 06:08:23 +08:00
|
|
|
(extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
|
2017-06-23 00:29:22 +08:00
|
|
|
>;
|
|
|
|
|
2017-10-03 08:06:41 +08:00
|
|
|
def : GCNPat<
|
Revert r372285 "GlobalISel: Don't materialize immarg arguments to intrinsics"
This broke the Chromium build, causing it to fail with e.g.
fatal error: error in backend: Cannot select: t362: v4i32 = X86ISD::VSHLI t392, Constant:i8<15>
See llvm-commits thread of r372285 for details.
This also reverts r372286, r372287, r372288, r372289, r372290, r372291,
r372292, r372293, r372296, and r372297, which seemed to depend on the
main commit.
> Encode them directly as an imm argument to G_INTRINSIC*.
>
> Since now intrinsics can now define what parameters are required to be
> immediates, avoid using registers for them. Intrinsics could
> potentially want a constant that isn't a legal register type. Also,
> since G_CONSTANT is subject to CSE and legalization, transforms could
> potentially obscure the value (and create extra work for the
> selector). The register bank of a G_CONSTANT is also meaningful, so
> this could throw off future folding and legalization logic for AMDGPU.
>
> This will be much more convenient to work with than needing to call
> getConstantVRegVal and checking if it may have failed for every
> constant intrinsic parameter. AMDGPU has quite a lot of intrinsics wth
> immarg operands, many of which need inspection during lowering. Having
> to find the value in a register is going to add a lot of boilerplate
> and waste compile time.
>
> SelectionDAG has always provided TargetConstant for constants which
> should not be legalized or materialized in a register. The distinction
> between Constant and TargetConstant was somewhat fuzzy, and there was
> no automatic way to force usage of TargetConstant for certain
> intrinsic parameters. They were both ultimately ConstantSDNode, and it
> was inconsistently used. It was quite easy to mis-select an
> instruction requiring an immediate. For SelectionDAG, start emitting
> TargetConstant for these arguments, and using timm to match them.
>
> Most of the work here is to cleanup target handling of constants. Some
> targets process intrinsics through intermediate custom nodes, which
> need to preserve TargetConstant usage to match the intrinsic
> expectation. Pattern inputs now need to distinguish whether a constant
> is merely compatible with an operand or whether it is mandatory.
>
> The GlobalISelEmitter needs to treat timm as a special case of a leaf
> node, simlar to MachineBasicBlock operands. This should also enable
> handling of patterns for some G_* instructions with immediates, like
> G_FENCE or G_EXTRACT.
>
> This does include a workaround for a crash in GlobalISelEmitter when
> ARM tries to uses "imm" in an output with a "timm" pattern source.
llvm-svn: 372314
2019-09-19 20:33:07 +08:00
|
|
|
(vt (name v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, imm:$offset,
|
|
|
|
imm:$format, imm:$cachepolicy, imm)),
|
2017-06-23 00:29:22 +08:00
|
|
|
(!cast<MTBUF_Pseudo>(opcode # _IDXEN) $vindex, $rsrc, $soffset, (as_i16imm $offset),
|
[AMDGPU] New tbuffer intrinsics
Summary:
This commit adds new intrinsics
llvm.amdgcn.raw.tbuffer.load
llvm.amdgcn.struct.tbuffer.load
llvm.amdgcn.raw.tbuffer.store
llvm.amdgcn.struct.tbuffer.store
with the following changes from the llvm.amdgcn.tbuffer.* intrinsics:
* there are separate raw and struct versions: raw does not have an index
arg and sets idxen=0 in the instruction, and struct always sets
idxen=1 in the instruction even if the index is 0, to allow for the
fact that gfx9 does bounds checking differently depending on whether
idxen is set;
* there is a combined format arg (dfmt+nfmt)
* there is a combined cachepolicy arg (glc+slc)
* there are now only two offset args: one for the offset that is
included in bounds checking and swizzling, to be split between the
instruction's voffset and immoffset fields, and one for the offset
that is excluded from bounds checking and swizzling, to go into the
instruction's soffset field.
The AMDISD::TBUFFER_* SD nodes always have an index operand, all three
offset operands, combined format operand, combined cachepolicy operand,
and an extra idxen operand.
The tbuffer pseudo- and real instructions now also have a combined
format operand.
The obsolescent llvm.amdgcn.tbuffer.* and llvm.SI.tbuffer.store
intrinsics continue to work.
V2: Separate raw and struct intrinsics.
V3: Moved extract_glc and extract_slc defs to a more sensible place.
V4: Rebased on D49995.
V5: Only two separate offset args instead of three.
V6: Pseudo- and real instructions have joint format operand.
V7: Restored optionality of dfmt and nfmt in assembler.
V8: Addressed minor review comments.
Subscribers: arsenm, kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, t-tye, llvm-commits
Differential Revision: https://reviews.llvm.org/D49026
Change-Id: If22ad77e349fac3a5d2f72dda53c010377d470d4
llvm-svn: 340268
2018-08-21 19:06:05 +08:00
|
|
|
(as_i8imm $format),
|
2019-05-01 06:08:23 +08:00
|
|
|
(extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
|
2017-06-23 00:29:22 +08:00
|
|
|
>;
|
|
|
|
|
2017-10-03 08:06:41 +08:00
|
|
|
def : GCNPat<
|
Revert r372285 "GlobalISel: Don't materialize immarg arguments to intrinsics"
This broke the Chromium build, causing it to fail with e.g.
fatal error: error in backend: Cannot select: t362: v4i32 = X86ISD::VSHLI t392, Constant:i8<15>
See llvm-commits thread of r372285 for details.
This also reverts r372286, r372287, r372288, r372289, r372290, r372291,
r372292, r372293, r372296, and r372297, which seemed to depend on the
main commit.
> Encode them directly as an imm argument to G_INTRINSIC*.
>
> Since now intrinsics can now define what parameters are required to be
> immediates, avoid using registers for them. Intrinsics could
> potentially want a constant that isn't a legal register type. Also,
> since G_CONSTANT is subject to CSE and legalization, transforms could
> potentially obscure the value (and create extra work for the
> selector). The register bank of a G_CONSTANT is also meaningful, so
> this could throw off future folding and legalization logic for AMDGPU.
>
> This will be much more convenient to work with than needing to call
> getConstantVRegVal and checking if it may have failed for every
> constant intrinsic parameter. AMDGPU has quite a lot of intrinsics wth
> immarg operands, many of which need inspection during lowering. Having
> to find the value in a register is going to add a lot of boilerplate
> and waste compile time.
>
> SelectionDAG has always provided TargetConstant for constants which
> should not be legalized or materialized in a register. The distinction
> between Constant and TargetConstant was somewhat fuzzy, and there was
> no automatic way to force usage of TargetConstant for certain
> intrinsic parameters. They were both ultimately ConstantSDNode, and it
> was inconsistently used. It was quite easy to mis-select an
> instruction requiring an immediate. For SelectionDAG, start emitting
> TargetConstant for these arguments, and using timm to match them.
>
> Most of the work here is to cleanup target handling of constants. Some
> targets process intrinsics through intermediate custom nodes, which
> need to preserve TargetConstant usage to match the intrinsic
> expectation. Pattern inputs now need to distinguish whether a constant
> is merely compatible with an operand or whether it is mandatory.
>
> The GlobalISelEmitter needs to treat timm as a special case of a leaf
> node, simlar to MachineBasicBlock operands. This should also enable
> handling of patterns for some G_* instructions with immediates, like
> G_FENCE or G_EXTRACT.
>
> This does include a workaround for a crash in GlobalISelEmitter when
> ARM tries to uses "imm" in an output with a "timm" pattern source.
llvm-svn: 372314
2019-09-19 20:33:07 +08:00
|
|
|
(vt (name v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, imm:$offset,
|
|
|
|
imm:$format, imm:$cachepolicy, 0)),
|
2017-06-23 00:29:22 +08:00
|
|
|
(!cast<MTBUF_Pseudo>(opcode # _OFFEN) $voffset, $rsrc, $soffset, (as_i16imm $offset),
|
[AMDGPU] New tbuffer intrinsics
Summary:
This commit adds new intrinsics
llvm.amdgcn.raw.tbuffer.load
llvm.amdgcn.struct.tbuffer.load
llvm.amdgcn.raw.tbuffer.store
llvm.amdgcn.struct.tbuffer.store
with the following changes from the llvm.amdgcn.tbuffer.* intrinsics:
* there are separate raw and struct versions: raw does not have an index
arg and sets idxen=0 in the instruction, and struct always sets
idxen=1 in the instruction even if the index is 0, to allow for the
fact that gfx9 does bounds checking differently depending on whether
idxen is set;
* there is a combined format arg (dfmt+nfmt)
* there is a combined cachepolicy arg (glc+slc)
* there are now only two offset args: one for the offset that is
included in bounds checking and swizzling, to be split between the
instruction's voffset and immoffset fields, and one for the offset
that is excluded from bounds checking and swizzling, to go into the
instruction's soffset field.
The AMDISD::TBUFFER_* SD nodes always have an index operand, all three
offset operands, combined format operand, combined cachepolicy operand,
and an extra idxen operand.
The tbuffer pseudo- and real instructions now also have a combined
format operand.
The obsolescent llvm.amdgcn.tbuffer.* and llvm.SI.tbuffer.store
intrinsics continue to work.
V2: Separate raw and struct intrinsics.
V3: Moved extract_glc and extract_slc defs to a more sensible place.
V4: Rebased on D49995.
V5: Only two separate offset args instead of three.
V6: Pseudo- and real instructions have joint format operand.
V7: Restored optionality of dfmt and nfmt in assembler.
V8: Addressed minor review comments.
Subscribers: arsenm, kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, t-tye, llvm-commits
Differential Revision: https://reviews.llvm.org/D49026
Change-Id: If22ad77e349fac3a5d2f72dda53c010377d470d4
llvm-svn: 340268
2018-08-21 19:06:05 +08:00
|
|
|
(as_i8imm $format),
|
2019-05-01 06:08:23 +08:00
|
|
|
(extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
|
2017-06-23 00:29:22 +08:00
|
|
|
>;
|
2016-09-10 21:09:16 +08:00
|
|
|
|
2017-10-03 08:06:41 +08:00
|
|
|
def : GCNPat<
|
Revert r372285 "GlobalISel: Don't materialize immarg arguments to intrinsics"
This broke the Chromium build, causing it to fail with e.g.
fatal error: error in backend: Cannot select: t362: v4i32 = X86ISD::VSHLI t392, Constant:i8<15>
See llvm-commits thread of r372285 for details.
This also reverts r372286, r372287, r372288, r372289, r372290, r372291,
r372292, r372293, r372296, and r372297, which seemed to depend on the
main commit.
> Encode them directly as an imm argument to G_INTRINSIC*.
>
> Since now intrinsics can now define what parameters are required to be
> immediates, avoid using registers for them. Intrinsics could
> potentially want a constant that isn't a legal register type. Also,
> since G_CONSTANT is subject to CSE and legalization, transforms could
> potentially obscure the value (and create extra work for the
> selector). The register bank of a G_CONSTANT is also meaningful, so
> this could throw off future folding and legalization logic for AMDGPU.
>
> This will be much more convenient to work with than needing to call
> getConstantVRegVal and checking if it may have failed for every
> constant intrinsic parameter. AMDGPU has quite a lot of intrinsics wth
> immarg operands, many of which need inspection during lowering. Having
> to find the value in a register is going to add a lot of boilerplate
> and waste compile time.
>
> SelectionDAG has always provided TargetConstant for constants which
> should not be legalized or materialized in a register. The distinction
> between Constant and TargetConstant was somewhat fuzzy, and there was
> no automatic way to force usage of TargetConstant for certain
> intrinsic parameters. They were both ultimately ConstantSDNode, and it
> was inconsistently used. It was quite easy to mis-select an
> instruction requiring an immediate. For SelectionDAG, start emitting
> TargetConstant for these arguments, and using timm to match them.
>
> Most of the work here is to cleanup target handling of constants. Some
> targets process intrinsics through intermediate custom nodes, which
> need to preserve TargetConstant usage to match the intrinsic
> expectation. Pattern inputs now need to distinguish whether a constant
> is merely compatible with an operand or whether it is mandatory.
>
> The GlobalISelEmitter needs to treat timm as a special case of a leaf
> node, simlar to MachineBasicBlock operands. This should also enable
> handling of patterns for some G_* instructions with immediates, like
> G_FENCE or G_EXTRACT.
>
> This does include a workaround for a crash in GlobalISelEmitter when
> ARM tries to uses "imm" in an output with a "timm" pattern source.
llvm-svn: 372314
2019-09-19 20:33:07 +08:00
|
|
|
(vt (name v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, imm:$offset,
|
|
|
|
imm:$format, imm:$cachepolicy, imm)),
|
2017-06-23 00:29:22 +08:00
|
|
|
(!cast<MTBUF_Pseudo>(opcode # _BOTHEN)
|
|
|
|
(REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
|
|
|
|
$rsrc, $soffset, (as_i16imm $offset),
|
[AMDGPU] New tbuffer intrinsics
Summary:
This commit adds new intrinsics
llvm.amdgcn.raw.tbuffer.load
llvm.amdgcn.struct.tbuffer.load
llvm.amdgcn.raw.tbuffer.store
llvm.amdgcn.struct.tbuffer.store
with the following changes from the llvm.amdgcn.tbuffer.* intrinsics:
* there are separate raw and struct versions: raw does not have an index
arg and sets idxen=0 in the instruction, and struct always sets
idxen=1 in the instruction even if the index is 0, to allow for the
fact that gfx9 does bounds checking differently depending on whether
idxen is set;
* there is a combined format arg (dfmt+nfmt)
* there is a combined cachepolicy arg (glc+slc)
* there are now only two offset args: one for the offset that is
included in bounds checking and swizzling, to be split between the
instruction's voffset and immoffset fields, and one for the offset
that is excluded from bounds checking and swizzling, to go into the
instruction's soffset field.
The AMDISD::TBUFFER_* SD nodes always have an index operand, all three
offset operands, combined format operand, combined cachepolicy operand,
and an extra idxen operand.
The tbuffer pseudo- and real instructions now also have a combined
format operand.
The obsolescent llvm.amdgcn.tbuffer.* and llvm.SI.tbuffer.store
intrinsics continue to work.
V2: Separate raw and struct intrinsics.
V3: Moved extract_glc and extract_slc defs to a more sensible place.
V4: Rebased on D49995.
V5: Only two separate offset args instead of three.
V6: Pseudo- and real instructions have joint format operand.
V7: Restored optionality of dfmt and nfmt in assembler.
V8: Addressed minor review comments.
Subscribers: arsenm, kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, t-tye, llvm-commits
Differential Revision: https://reviews.llvm.org/D49026
Change-Id: If22ad77e349fac3a5d2f72dda53c010377d470d4
llvm-svn: 340268
2018-08-21 19:06:05 +08:00
|
|
|
(as_i8imm $format),
|
2019-05-01 06:08:23 +08:00
|
|
|
(extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
|
2017-06-23 00:29:22 +08:00
|
|
|
>;
|
|
|
|
}
|
|
|
|
|
|
|
|
defm : MTBUF_LoadIntrinsicPat<SItbuffer_load, i32, "TBUFFER_LOAD_FORMAT_X">;
|
|
|
|
defm : MTBUF_LoadIntrinsicPat<SItbuffer_load, v2i32, "TBUFFER_LOAD_FORMAT_XY">;
|
2019-03-22 22:58:02 +08:00
|
|
|
defm : MTBUF_LoadIntrinsicPat<SItbuffer_load, v3i32, "TBUFFER_LOAD_FORMAT_XYZ">;
|
2017-06-23 00:29:22 +08:00
|
|
|
defm : MTBUF_LoadIntrinsicPat<SItbuffer_load, v4i32, "TBUFFER_LOAD_FORMAT_XYZW">;
|
|
|
|
defm : MTBUF_LoadIntrinsicPat<SItbuffer_load, f32, "TBUFFER_LOAD_FORMAT_X">;
|
|
|
|
defm : MTBUF_LoadIntrinsicPat<SItbuffer_load, v2f32, "TBUFFER_LOAD_FORMAT_XY">;
|
2019-03-22 22:58:02 +08:00
|
|
|
defm : MTBUF_LoadIntrinsicPat<SItbuffer_load, v3f32, "TBUFFER_LOAD_FORMAT_XYZ">;
|
2017-06-23 00:29:22 +08:00
|
|
|
defm : MTBUF_LoadIntrinsicPat<SItbuffer_load, v4f32, "TBUFFER_LOAD_FORMAT_XYZW">;
|
|
|
|
|
2018-01-13 05:12:19 +08:00
|
|
|
let SubtargetPredicate = HasUnpackedD16VMem in {
|
2018-05-22 14:32:10 +08:00
|
|
|
defm : MTBUF_LoadIntrinsicPat<SItbuffer_load_d16, f16, "TBUFFER_LOAD_FORMAT_D16_X_gfx80">;
|
2018-01-13 05:12:19 +08:00
|
|
|
defm : MTBUF_LoadIntrinsicPat<SItbuffer_load_d16, v2i32, "TBUFFER_LOAD_FORMAT_D16_XY_gfx80">;
|
|
|
|
defm : MTBUF_LoadIntrinsicPat<SItbuffer_load_d16, v4i32, "TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80">;
|
|
|
|
} // End HasUnpackedD16VMem.
|
|
|
|
|
|
|
|
let SubtargetPredicate = HasPackedD16VMem in {
|
2018-05-22 14:32:10 +08:00
|
|
|
defm : MTBUF_LoadIntrinsicPat<SItbuffer_load_d16, f16, "TBUFFER_LOAD_FORMAT_D16_X">;
|
|
|
|
defm : MTBUF_LoadIntrinsicPat<SItbuffer_load_d16, v2f16, "TBUFFER_LOAD_FORMAT_D16_XY">;
|
2018-06-15 23:15:46 +08:00
|
|
|
defm : MTBUF_LoadIntrinsicPat<SItbuffer_load_d16, v4f16, "TBUFFER_LOAD_FORMAT_D16_XYZW">;
|
2018-01-13 05:12:19 +08:00
|
|
|
} // End HasPackedD16VMem.
|
|
|
|
|
2017-06-23 00:29:22 +08:00
|
|
|
multiclass MTBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
|
|
|
|
string opcode> {
|
2017-10-03 08:06:41 +08:00
|
|
|
def : GCNPat<
|
Revert r372285 "GlobalISel: Don't materialize immarg arguments to intrinsics"
This broke the Chromium build, causing it to fail with e.g.
fatal error: error in backend: Cannot select: t362: v4i32 = X86ISD::VSHLI t392, Constant:i8<15>
See llvm-commits thread of r372285 for details.
This also reverts r372286, r372287, r372288, r372289, r372290, r372291,
r372292, r372293, r372296, and r372297, which seemed to depend on the
main commit.
> Encode them directly as an imm argument to G_INTRINSIC*.
>
> Since now intrinsics can now define what parameters are required to be
> immediates, avoid using registers for them. Intrinsics could
> potentially want a constant that isn't a legal register type. Also,
> since G_CONSTANT is subject to CSE and legalization, transforms could
> potentially obscure the value (and create extra work for the
> selector). The register bank of a G_CONSTANT is also meaningful, so
> this could throw off future folding and legalization logic for AMDGPU.
>
> This will be much more convenient to work with than needing to call
> getConstantVRegVal and checking if it may have failed for every
> constant intrinsic parameter. AMDGPU has quite a lot of intrinsics wth
> immarg operands, many of which need inspection during lowering. Having
> to find the value in a register is going to add a lot of boilerplate
> and waste compile time.
>
> SelectionDAG has always provided TargetConstant for constants which
> should not be legalized or materialized in a register. The distinction
> between Constant and TargetConstant was somewhat fuzzy, and there was
> no automatic way to force usage of TargetConstant for certain
> intrinsic parameters. They were both ultimately ConstantSDNode, and it
> was inconsistently used. It was quite easy to mis-select an
> instruction requiring an immediate. For SelectionDAG, start emitting
> TargetConstant for these arguments, and using timm to match them.
>
> Most of the work here is to cleanup target handling of constants. Some
> targets process intrinsics through intermediate custom nodes, which
> need to preserve TargetConstant usage to match the intrinsic
> expectation. Pattern inputs now need to distinguish whether a constant
> is merely compatible with an operand or whether it is mandatory.
>
> The GlobalISelEmitter needs to treat timm as a special case of a leaf
> node, simlar to MachineBasicBlock operands. This should also enable
> handling of patterns for some G_* instructions with immediates, like
> G_FENCE or G_EXTRACT.
>
> This does include a workaround for a crash in GlobalISelEmitter when
> ARM tries to uses "imm" in an output with a "timm" pattern source.
llvm-svn: 372314
2019-09-19 20:33:07 +08:00
|
|
|
(name vt:$vdata, v4i32:$rsrc, 0, 0, i32:$soffset, imm:$offset,
|
|
|
|
imm:$format, imm:$cachepolicy, 0),
|
2017-06-23 00:29:22 +08:00
|
|
|
(!cast<MTBUF_Pseudo>(opcode # _OFFSET_exact) $vdata, $rsrc, $soffset,
|
[AMDGPU] New tbuffer intrinsics
Summary:
This commit adds new intrinsics
llvm.amdgcn.raw.tbuffer.load
llvm.amdgcn.struct.tbuffer.load
llvm.amdgcn.raw.tbuffer.store
llvm.amdgcn.struct.tbuffer.store
with the following changes from the llvm.amdgcn.tbuffer.* intrinsics:
* there are separate raw and struct versions: raw does not have an index
arg and sets idxen=0 in the instruction, and struct always sets
idxen=1 in the instruction even if the index is 0, to allow for the
fact that gfx9 does bounds checking differently depending on whether
idxen is set;
* there is a combined format arg (dfmt+nfmt)
* there is a combined cachepolicy arg (glc+slc)
* there are now only two offset args: one for the offset that is
included in bounds checking and swizzling, to be split between the
instruction's voffset and immoffset fields, and one for the offset
that is excluded from bounds checking and swizzling, to go into the
instruction's soffset field.
The AMDISD::TBUFFER_* SD nodes always have an index operand, all three
offset operands, combined format operand, combined cachepolicy operand,
and an extra idxen operand.
The tbuffer pseudo- and real instructions now also have a combined
format operand.
The obsolescent llvm.amdgcn.tbuffer.* and llvm.SI.tbuffer.store
intrinsics continue to work.
V2: Separate raw and struct intrinsics.
V3: Moved extract_glc and extract_slc defs to a more sensible place.
V4: Rebased on D49995.
V5: Only two separate offset args instead of three.
V6: Pseudo- and real instructions have joint format operand.
V7: Restored optionality of dfmt and nfmt in assembler.
V8: Addressed minor review comments.
Subscribers: arsenm, kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, t-tye, llvm-commits
Differential Revision: https://reviews.llvm.org/D49026
Change-Id: If22ad77e349fac3a5d2f72dda53c010377d470d4
llvm-svn: 340268
2018-08-21 19:06:05 +08:00
|
|
|
(as_i16imm $offset), (as_i8imm $format),
|
2019-05-01 06:08:23 +08:00
|
|
|
(extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
|
2017-06-23 00:29:22 +08:00
|
|
|
>;
|
|
|
|
|
2017-10-03 08:06:41 +08:00
|
|
|
def : GCNPat<
|
Revert r372285 "GlobalISel: Don't materialize immarg arguments to intrinsics"
This broke the Chromium build, causing it to fail with e.g.
fatal error: error in backend: Cannot select: t362: v4i32 = X86ISD::VSHLI t392, Constant:i8<15>
See llvm-commits thread of r372285 for details.
This also reverts r372286, r372287, r372288, r372289, r372290, r372291,
r372292, r372293, r372296, and r372297, which seemed to depend on the
main commit.
> Encode them directly as an imm argument to G_INTRINSIC*.
>
> Since now intrinsics can now define what parameters are required to be
> immediates, avoid using registers for them. Intrinsics could
> potentially want a constant that isn't a legal register type. Also,
> since G_CONSTANT is subject to CSE and legalization, transforms could
> potentially obscure the value (and create extra work for the
> selector). The register bank of a G_CONSTANT is also meaningful, so
> this could throw off future folding and legalization logic for AMDGPU.
>
> This will be much more convenient to work with than needing to call
> getConstantVRegVal and checking if it may have failed for every
> constant intrinsic parameter. AMDGPU has quite a lot of intrinsics wth
> immarg operands, many of which need inspection during lowering. Having
> to find the value in a register is going to add a lot of boilerplate
> and waste compile time.
>
> SelectionDAG has always provided TargetConstant for constants which
> should not be legalized or materialized in a register. The distinction
> between Constant and TargetConstant was somewhat fuzzy, and there was
> no automatic way to force usage of TargetConstant for certain
> intrinsic parameters. They were both ultimately ConstantSDNode, and it
> was inconsistently used. It was quite easy to mis-select an
> instruction requiring an immediate. For SelectionDAG, start emitting
> TargetConstant for these arguments, and using timm to match them.
>
> Most of the work here is to cleanup target handling of constants. Some
> targets process intrinsics through intermediate custom nodes, which
> need to preserve TargetConstant usage to match the intrinsic
> expectation. Pattern inputs now need to distinguish whether a constant
> is merely compatible with an operand or whether it is mandatory.
>
> The GlobalISelEmitter needs to treat timm as a special case of a leaf
> node, simlar to MachineBasicBlock operands. This should also enable
> handling of patterns for some G_* instructions with immediates, like
> G_FENCE or G_EXTRACT.
>
> This does include a workaround for a crash in GlobalISelEmitter when
> ARM tries to uses "imm" in an output with a "timm" pattern source.
llvm-svn: 372314
2019-09-19 20:33:07 +08:00
|
|
|
(name vt:$vdata, v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, imm:$offset,
|
|
|
|
imm:$format, imm:$cachepolicy, imm),
|
2017-06-23 00:29:22 +08:00
|
|
|
(!cast<MTBUF_Pseudo>(opcode # _IDXEN_exact) $vdata, $vindex, $rsrc, $soffset,
|
[AMDGPU] New tbuffer intrinsics
Summary:
This commit adds new intrinsics
llvm.amdgcn.raw.tbuffer.load
llvm.amdgcn.struct.tbuffer.load
llvm.amdgcn.raw.tbuffer.store
llvm.amdgcn.struct.tbuffer.store
with the following changes from the llvm.amdgcn.tbuffer.* intrinsics:
* there are separate raw and struct versions: raw does not have an index
arg and sets idxen=0 in the instruction, and struct always sets
idxen=1 in the instruction even if the index is 0, to allow for the
fact that gfx9 does bounds checking differently depending on whether
idxen is set;
* there is a combined format arg (dfmt+nfmt)
* there is a combined cachepolicy arg (glc+slc)
* there are now only two offset args: one for the offset that is
included in bounds checking and swizzling, to be split between the
instruction's voffset and immoffset fields, and one for the offset
that is excluded from bounds checking and swizzling, to go into the
instruction's soffset field.
The AMDISD::TBUFFER_* SD nodes always have an index operand, all three
offset operands, combined format operand, combined cachepolicy operand,
and an extra idxen operand.
The tbuffer pseudo- and real instructions now also have a combined
format operand.
The obsolescent llvm.amdgcn.tbuffer.* and llvm.SI.tbuffer.store
intrinsics continue to work.
V2: Separate raw and struct intrinsics.
V3: Moved extract_glc and extract_slc defs to a more sensible place.
V4: Rebased on D49995.
V5: Only two separate offset args instead of three.
V6: Pseudo- and real instructions have joint format operand.
V7: Restored optionality of dfmt and nfmt in assembler.
V8: Addressed minor review comments.
Subscribers: arsenm, kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, t-tye, llvm-commits
Differential Revision: https://reviews.llvm.org/D49026
Change-Id: If22ad77e349fac3a5d2f72dda53c010377d470d4
llvm-svn: 340268
2018-08-21 19:06:05 +08:00
|
|
|
(as_i16imm $offset), (as_i8imm $format),
|
2019-05-01 06:08:23 +08:00
|
|
|
(extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
|
2017-06-23 00:29:22 +08:00
|
|
|
>;
|
|
|
|
|
2017-10-03 08:06:41 +08:00
|
|
|
def : GCNPat<
|
Revert r372285 "GlobalISel: Don't materialize immarg arguments to intrinsics"
This broke the Chromium build, causing it to fail with e.g.
fatal error: error in backend: Cannot select: t362: v4i32 = X86ISD::VSHLI t392, Constant:i8<15>
See llvm-commits thread of r372285 for details.
This also reverts r372286, r372287, r372288, r372289, r372290, r372291,
r372292, r372293, r372296, and r372297, which seemed to depend on the
main commit.
> Encode them directly as an imm argument to G_INTRINSIC*.
>
> Since now intrinsics can now define what parameters are required to be
> immediates, avoid using registers for them. Intrinsics could
> potentially want a constant that isn't a legal register type. Also,
> since G_CONSTANT is subject to CSE and legalization, transforms could
> potentially obscure the value (and create extra work for the
> selector). The register bank of a G_CONSTANT is also meaningful, so
> this could throw off future folding and legalization logic for AMDGPU.
>
> This will be much more convenient to work with than needing to call
> getConstantVRegVal and checking if it may have failed for every
> constant intrinsic parameter. AMDGPU has quite a lot of intrinsics wth
> immarg operands, many of which need inspection during lowering. Having
> to find the value in a register is going to add a lot of boilerplate
> and waste compile time.
>
> SelectionDAG has always provided TargetConstant for constants which
> should not be legalized or materialized in a register. The distinction
> between Constant and TargetConstant was somewhat fuzzy, and there was
> no automatic way to force usage of TargetConstant for certain
> intrinsic parameters. They were both ultimately ConstantSDNode, and it
> was inconsistently used. It was quite easy to mis-select an
> instruction requiring an immediate. For SelectionDAG, start emitting
> TargetConstant for these arguments, and using timm to match them.
>
> Most of the work here is to cleanup target handling of constants. Some
> targets process intrinsics through intermediate custom nodes, which
> need to preserve TargetConstant usage to match the intrinsic
> expectation. Pattern inputs now need to distinguish whether a constant
> is merely compatible with an operand or whether it is mandatory.
>
> The GlobalISelEmitter needs to treat timm as a special case of a leaf
> node, simlar to MachineBasicBlock operands. This should also enable
> handling of patterns for some G_* instructions with immediates, like
> G_FENCE or G_EXTRACT.
>
> This does include a workaround for a crash in GlobalISelEmitter when
> ARM tries to uses "imm" in an output with a "timm" pattern source.
llvm-svn: 372314
2019-09-19 20:33:07 +08:00
|
|
|
(name vt:$vdata, v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, imm:$offset,
|
|
|
|
imm:$format, imm:$cachepolicy, 0),
|
2017-06-23 00:29:22 +08:00
|
|
|
(!cast<MTBUF_Pseudo>(opcode # _OFFEN_exact) $vdata, $voffset, $rsrc, $soffset,
|
[AMDGPU] New tbuffer intrinsics
Summary:
This commit adds new intrinsics
llvm.amdgcn.raw.tbuffer.load
llvm.amdgcn.struct.tbuffer.load
llvm.amdgcn.raw.tbuffer.store
llvm.amdgcn.struct.tbuffer.store
with the following changes from the llvm.amdgcn.tbuffer.* intrinsics:
* there are separate raw and struct versions: raw does not have an index
arg and sets idxen=0 in the instruction, and struct always sets
idxen=1 in the instruction even if the index is 0, to allow for the
fact that gfx9 does bounds checking differently depending on whether
idxen is set;
* there is a combined format arg (dfmt+nfmt)
* there is a combined cachepolicy arg (glc+slc)
* there are now only two offset args: one for the offset that is
included in bounds checking and swizzling, to be split between the
instruction's voffset and immoffset fields, and one for the offset
that is excluded from bounds checking and swizzling, to go into the
instruction's soffset field.
The AMDISD::TBUFFER_* SD nodes always have an index operand, all three
offset operands, combined format operand, combined cachepolicy operand,
and an extra idxen operand.
The tbuffer pseudo- and real instructions now also have a combined
format operand.
The obsolescent llvm.amdgcn.tbuffer.* and llvm.SI.tbuffer.store
intrinsics continue to work.
V2: Separate raw and struct intrinsics.
V3: Moved extract_glc and extract_slc defs to a more sensible place.
V4: Rebased on D49995.
V5: Only two separate offset args instead of three.
V6: Pseudo- and real instructions have joint format operand.
V7: Restored optionality of dfmt and nfmt in assembler.
V8: Addressed minor review comments.
Subscribers: arsenm, kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, t-tye, llvm-commits
Differential Revision: https://reviews.llvm.org/D49026
Change-Id: If22ad77e349fac3a5d2f72dda53c010377d470d4
llvm-svn: 340268
2018-08-21 19:06:05 +08:00
|
|
|
(as_i16imm $offset), (as_i8imm $format),
|
2019-05-01 06:08:23 +08:00
|
|
|
(extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
|
2017-06-23 00:29:22 +08:00
|
|
|
>;
|
|
|
|
|
2017-10-03 08:06:41 +08:00
|
|
|
def : GCNPat<
|
2017-06-23 00:29:22 +08:00
|
|
|
(name vt:$vdata, v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset,
|
Revert r372285 "GlobalISel: Don't materialize immarg arguments to intrinsics"
This broke the Chromium build, causing it to fail with e.g.
fatal error: error in backend: Cannot select: t362: v4i32 = X86ISD::VSHLI t392, Constant:i8<15>
See llvm-commits thread of r372285 for details.
This also reverts r372286, r372287, r372288, r372289, r372290, r372291,
r372292, r372293, r372296, and r372297, which seemed to depend on the
main commit.
> Encode them directly as an imm argument to G_INTRINSIC*.
>
> Since now intrinsics can now define what parameters are required to be
> immediates, avoid using registers for them. Intrinsics could
> potentially want a constant that isn't a legal register type. Also,
> since G_CONSTANT is subject to CSE and legalization, transforms could
> potentially obscure the value (and create extra work for the
> selector). The register bank of a G_CONSTANT is also meaningful, so
> this could throw off future folding and legalization logic for AMDGPU.
>
> This will be much more convenient to work with than needing to call
> getConstantVRegVal and checking if it may have failed for every
> constant intrinsic parameter. AMDGPU has quite a lot of intrinsics wth
> immarg operands, many of which need inspection during lowering. Having
> to find the value in a register is going to add a lot of boilerplate
> and waste compile time.
>
> SelectionDAG has always provided TargetConstant for constants which
> should not be legalized or materialized in a register. The distinction
> between Constant and TargetConstant was somewhat fuzzy, and there was
> no automatic way to force usage of TargetConstant for certain
> intrinsic parameters. They were both ultimately ConstantSDNode, and it
> was inconsistently used. It was quite easy to mis-select an
> instruction requiring an immediate. For SelectionDAG, start emitting
> TargetConstant for these arguments, and using timm to match them.
>
> Most of the work here is to cleanup target handling of constants. Some
> targets process intrinsics through intermediate custom nodes, which
> need to preserve TargetConstant usage to match the intrinsic
> expectation. Pattern inputs now need to distinguish whether a constant
> is merely compatible with an operand or whether it is mandatory.
>
> The GlobalISelEmitter needs to treat timm as a special case of a leaf
> node, simlar to MachineBasicBlock operands. This should also enable
> handling of patterns for some G_* instructions with immediates, like
> G_FENCE or G_EXTRACT.
>
> This does include a workaround for a crash in GlobalISelEmitter when
> ARM tries to uses "imm" in an output with a "timm" pattern source.
llvm-svn: 372314
2019-09-19 20:33:07 +08:00
|
|
|
imm:$offset, imm:$format, imm:$cachepolicy, imm),
|
2017-06-23 00:29:22 +08:00
|
|
|
(!cast<MTBUF_Pseudo>(opcode # _BOTHEN_exact)
|
|
|
|
$vdata,
|
|
|
|
(REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
|
[AMDGPU] New tbuffer intrinsics
Summary:
This commit adds new intrinsics
llvm.amdgcn.raw.tbuffer.load
llvm.amdgcn.struct.tbuffer.load
llvm.amdgcn.raw.tbuffer.store
llvm.amdgcn.struct.tbuffer.store
with the following changes from the llvm.amdgcn.tbuffer.* intrinsics:
* there are separate raw and struct versions: raw does not have an index
arg and sets idxen=0 in the instruction, and struct always sets
idxen=1 in the instruction even if the index is 0, to allow for the
fact that gfx9 does bounds checking differently depending on whether
idxen is set;
* there is a combined format arg (dfmt+nfmt)
* there is a combined cachepolicy arg (glc+slc)
* there are now only two offset args: one for the offset that is
included in bounds checking and swizzling, to be split between the
instruction's voffset and immoffset fields, and one for the offset
that is excluded from bounds checking and swizzling, to go into the
instruction's soffset field.
The AMDISD::TBUFFER_* SD nodes always have an index operand, all three
offset operands, combined format operand, combined cachepolicy operand,
and an extra idxen operand.
The tbuffer pseudo- and real instructions now also have a combined
format operand.
The obsolescent llvm.amdgcn.tbuffer.* and llvm.SI.tbuffer.store
intrinsics continue to work.
V2: Separate raw and struct intrinsics.
V3: Moved extract_glc and extract_slc defs to a more sensible place.
V4: Rebased on D49995.
V5: Only two separate offset args instead of three.
V6: Pseudo- and real instructions have joint format operand.
V7: Restored optionality of dfmt and nfmt in assembler.
V8: Addressed minor review comments.
Subscribers: arsenm, kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, t-tye, llvm-commits
Differential Revision: https://reviews.llvm.org/D49026
Change-Id: If22ad77e349fac3a5d2f72dda53c010377d470d4
llvm-svn: 340268
2018-08-21 19:06:05 +08:00
|
|
|
$rsrc, $soffset, (as_i16imm $offset), (as_i8imm $format),
|
2019-05-01 06:08:23 +08:00
|
|
|
(extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
|
2017-06-23 00:29:22 +08:00
|
|
|
>;
|
|
|
|
}
|
|
|
|
|
|
|
|
defm : MTBUF_StoreIntrinsicPat<SItbuffer_store, i32, "TBUFFER_STORE_FORMAT_X">;
|
|
|
|
defm : MTBUF_StoreIntrinsicPat<SItbuffer_store, v2i32, "TBUFFER_STORE_FORMAT_XY">;
|
2019-03-22 22:58:02 +08:00
|
|
|
defm : MTBUF_StoreIntrinsicPat<SItbuffer_store, v3i32, "TBUFFER_STORE_FORMAT_XYZ">;
|
2017-06-23 00:29:22 +08:00
|
|
|
defm : MTBUF_StoreIntrinsicPat<SItbuffer_store, v4i32, "TBUFFER_STORE_FORMAT_XYZW">;
|
|
|
|
defm : MTBUF_StoreIntrinsicPat<SItbuffer_store, f32, "TBUFFER_STORE_FORMAT_X">;
|
|
|
|
defm : MTBUF_StoreIntrinsicPat<SItbuffer_store, v2f32, "TBUFFER_STORE_FORMAT_XY">;
|
2019-03-22 22:58:02 +08:00
|
|
|
defm : MTBUF_StoreIntrinsicPat<SItbuffer_store, v3f32, "TBUFFER_STORE_FORMAT_XYZ">;
|
2017-06-23 00:29:22 +08:00
|
|
|
defm : MTBUF_StoreIntrinsicPat<SItbuffer_store, v4f32, "TBUFFER_STORE_FORMAT_XYZW">;
|
2016-09-10 21:09:16 +08:00
|
|
|
|
2018-01-13 05:12:19 +08:00
|
|
|
let SubtargetPredicate = HasUnpackedD16VMem in {
|
|
|
|
defm : MTBUF_StoreIntrinsicPat<SItbuffer_store_d16, f16, "TBUFFER_STORE_FORMAT_D16_X_gfx80">;
|
|
|
|
defm : MTBUF_StoreIntrinsicPat<SItbuffer_store_d16, v2i32, "TBUFFER_STORE_FORMAT_D16_XY_gfx80">;
|
|
|
|
defm : MTBUF_StoreIntrinsicPat<SItbuffer_store_d16, v4i32, "TBUFFER_STORE_FORMAT_D16_XYZW_gfx80">;
|
|
|
|
} // End HasUnpackedD16VMem.
|
|
|
|
|
|
|
|
let SubtargetPredicate = HasPackedD16VMem in {
|
|
|
|
defm : MTBUF_StoreIntrinsicPat<SItbuffer_store_d16, f16, "TBUFFER_STORE_FORMAT_D16_X">;
|
|
|
|
defm : MTBUF_StoreIntrinsicPat<SItbuffer_store_d16, v2f16, "TBUFFER_STORE_FORMAT_D16_XY">;
|
2018-06-15 23:15:46 +08:00
|
|
|
defm : MTBUF_StoreIntrinsicPat<SItbuffer_store_d16, v4f16, "TBUFFER_STORE_FORMAT_D16_XYZW">;
|
2018-01-13 05:12:19 +08:00
|
|
|
} // End HasPackedD16VMem.
|
|
|
|
|
2016-09-10 21:09:16 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
2019-04-06 02:24:34 +08:00
|
|
|
// Target-specific instruction encodings.
|
2016-09-10 21:09:16 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
//===----------------------------------------------------------------------===//
|
2019-05-01 06:08:23 +08:00
|
|
|
// Base ENC_MUBUF for GFX6, GFX7, GFX10.
|
2016-09-10 21:09:16 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2019-05-01 06:08:23 +08:00
|
|
|
class Base_MUBUF_Real_gfx6_gfx7_gfx10<bits<7> op, MUBUF_Pseudo ps, int ef> :
|
|
|
|
MUBUF_Real<ps>, Enc64, SIMCInstr<ps.PseudoInstr, ef> {
|
2016-09-10 21:09:16 +08:00
|
|
|
let Inst{11-0} = !if(ps.has_offset, offset, ?);
|
|
|
|
let Inst{12} = ps.offen;
|
|
|
|
let Inst{13} = ps.idxen;
|
|
|
|
let Inst{14} = !if(ps.has_glc, glc, ps.glc_value);
|
2018-02-21 21:13:48 +08:00
|
|
|
let Inst{16} = !if(ps.lds, 1, 0);
|
2016-09-10 21:09:16 +08:00
|
|
|
let Inst{24-18} = op;
|
2019-05-01 06:08:23 +08:00
|
|
|
let Inst{31-26} = 0x38;
|
2016-09-10 21:09:16 +08:00
|
|
|
let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?);
|
|
|
|
let Inst{47-40} = !if(ps.has_vdata, vdata, ?);
|
|
|
|
let Inst{52-48} = !if(ps.has_srsrc, srsrc{6-2}, ?);
|
|
|
|
let Inst{54} = !if(ps.has_slc, slc, ?);
|
|
|
|
let Inst{55} = !if(ps.has_tfe, tfe, ?);
|
|
|
|
let Inst{63-56} = !if(ps.has_soffset, soffset, ?);
|
|
|
|
}
|
|
|
|
|
2019-05-01 06:08:23 +08:00
|
|
|
class MUBUF_Real_gfx10<bits<8> op, MUBUF_Pseudo ps> :
|
|
|
|
Base_MUBUF_Real_gfx6_gfx7_gfx10<op{6-0}, ps, SIEncodingFamily.GFX10> {
|
|
|
|
let Inst{15} = !if(ps.has_dlc, dlc, ps.dlc_value);
|
|
|
|
let Inst{25} = op{7};
|
|
|
|
}
|
|
|
|
|
|
|
|
class MUBUF_Real_gfx6_gfx7<bits<8> op, MUBUF_Pseudo ps> :
|
|
|
|
Base_MUBUF_Real_gfx6_gfx7_gfx10<op{6-0}, ps, SIEncodingFamily.SI> {
|
|
|
|
let Inst{15} = ps.addr64;
|
|
|
|
}
|
|
|
|
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// MUBUF - GFX10.
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in {
|
|
|
|
multiclass MUBUF_Real_gfx10_with_name<bits<8> op, string opName,
|
|
|
|
string asmName> {
|
|
|
|
def _gfx10 : MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(opName)> {
|
|
|
|
MUBUF_Pseudo ps = !cast<MUBUF_Pseudo>(opName);
|
|
|
|
let AsmString = asmName # ps.AsmOperands;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
multiclass MUBUF_Real_AllAddr_gfx10<bits<8> op> {
|
|
|
|
def _BOTHEN_gfx10 :
|
|
|
|
MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN")>;
|
|
|
|
def _IDXEN_gfx10 :
|
|
|
|
MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN")>;
|
|
|
|
def _OFFEN_gfx10 :
|
|
|
|
MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN")>;
|
|
|
|
def _OFFSET_gfx10 :
|
|
|
|
MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET")>;
|
|
|
|
}
|
|
|
|
multiclass MUBUF_Real_AllAddr_Lds_gfx10<bits<8> op> {
|
|
|
|
def _OFFSET_gfx10 : MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET")>,
|
|
|
|
MUBUFLdsTable<0, NAME # "_OFFSET_gfx10">;
|
|
|
|
def _OFFEN_gfx10 : MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN")>,
|
|
|
|
MUBUFLdsTable<0, NAME # "_OFFEN_gfx10">;
|
|
|
|
def _IDXEN_gfx10 : MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN")>,
|
|
|
|
MUBUFLdsTable<0, NAME # "_IDXEN_gfx10">;
|
|
|
|
def _BOTHEN_gfx10 : MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN")>,
|
|
|
|
MUBUFLdsTable<0, NAME # "_BOTHEN_gfx10">;
|
|
|
|
|
|
|
|
def _LDS_OFFSET_gfx10 : MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_LDS_OFFSET")>,
|
|
|
|
MUBUFLdsTable<1, NAME # "_OFFSET_gfx10">;
|
|
|
|
def _LDS_OFFEN_gfx10 : MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_LDS_OFFEN")>,
|
|
|
|
MUBUFLdsTable<1, NAME # "_OFFEN_gfx10">;
|
|
|
|
def _LDS_IDXEN_gfx10 : MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_LDS_IDXEN")>,
|
|
|
|
MUBUFLdsTable<1, NAME # "_IDXEN_gfx10">;
|
|
|
|
def _LDS_BOTHEN_gfx10 : MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_LDS_BOTHEN")>,
|
|
|
|
MUBUFLdsTable<1, NAME # "_BOTHEN_gfx10">;
|
|
|
|
}
|
|
|
|
multiclass MUBUF_Real_Atomics_gfx10<bits<8> op> :
|
|
|
|
MUBUF_Real_AllAddr_gfx10<op> {
|
|
|
|
def _BOTHEN_RTN_gfx10 :
|
|
|
|
MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN_RTN")>;
|
|
|
|
def _IDXEN_RTN_gfx10 :
|
|
|
|
MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN_RTN")>;
|
|
|
|
def _OFFEN_RTN_gfx10 :
|
|
|
|
MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN_RTN")>;
|
|
|
|
def _OFFSET_RTN_gfx10 :
|
|
|
|
MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET_RTN")>;
|
|
|
|
}
|
|
|
|
} // End AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10"
|
|
|
|
|
|
|
|
defm BUFFER_STORE_BYTE_D16_HI : MUBUF_Real_AllAddr_gfx10<0x019>;
|
|
|
|
defm BUFFER_STORE_SHORT_D16_HI : MUBUF_Real_AllAddr_gfx10<0x01b>;
|
|
|
|
defm BUFFER_LOAD_UBYTE_D16 : MUBUF_Real_AllAddr_gfx10<0x020>;
|
|
|
|
defm BUFFER_LOAD_UBYTE_D16_HI : MUBUF_Real_AllAddr_gfx10<0x021>;
|
|
|
|
defm BUFFER_LOAD_SBYTE_D16 : MUBUF_Real_AllAddr_gfx10<0x022>;
|
|
|
|
defm BUFFER_LOAD_SBYTE_D16_HI : MUBUF_Real_AllAddr_gfx10<0x023>;
|
|
|
|
defm BUFFER_LOAD_SHORT_D16 : MUBUF_Real_AllAddr_gfx10<0x024>;
|
|
|
|
defm BUFFER_LOAD_SHORT_D16_HI : MUBUF_Real_AllAddr_gfx10<0x025>;
|
|
|
|
// FIXME-GFX10: Add following instructions:
|
|
|
|
//defm BUFFER_LOAD_FORMAT_D16_HI_X : MUBUF_Real_AllAddr_gfx10<0x026>;
|
|
|
|
//defm BUFFER_STORE_FORMAT_D16_HI_X : MUBUF_Real_AllAddr_gfx10<0x027>;
|
|
|
|
defm BUFFER_LOAD_FORMAT_D16_X : MUBUF_Real_AllAddr_gfx10<0x080>;
|
|
|
|
defm BUFFER_LOAD_FORMAT_D16_XY : MUBUF_Real_AllAddr_gfx10<0x081>;
|
|
|
|
defm BUFFER_LOAD_FORMAT_D16_XYZ : MUBUF_Real_AllAddr_gfx10<0x082>;
|
|
|
|
defm BUFFER_LOAD_FORMAT_D16_XYZW : MUBUF_Real_AllAddr_gfx10<0x083>;
|
|
|
|
defm BUFFER_STORE_FORMAT_D16_X : MUBUF_Real_AllAddr_gfx10<0x084>;
|
|
|
|
defm BUFFER_STORE_FORMAT_D16_XY : MUBUF_Real_AllAddr_gfx10<0x085>;
|
|
|
|
defm BUFFER_STORE_FORMAT_D16_XYZ : MUBUF_Real_AllAddr_gfx10<0x086>;
|
|
|
|
defm BUFFER_STORE_FORMAT_D16_XYZW : MUBUF_Real_AllAddr_gfx10<0x087>;
|
|
|
|
|
|
|
|
def BUFFER_GL0_INV_gfx10 :
|
|
|
|
MUBUF_Real_gfx10<0x071, BUFFER_GL0_INV>;
|
|
|
|
def BUFFER_GL1_INV_gfx10 :
|
|
|
|
MUBUF_Real_gfx10<0x072, BUFFER_GL1_INV>;
|
|
|
|
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// MUBUF - GFX6, GFX7, GFX10.
|
|
|
|
//===----------------------------------------------------------------------===//
|
2016-09-24 05:21:21 +08:00
|
|
|
|
2019-05-01 06:08:23 +08:00
|
|
|
let AssemblerPredicate = isGFX6, DecoderNamespace = "GFX6" in {
|
|
|
|
multiclass MUBUF_Real_gfx6<bits<8> op> {
|
|
|
|
def _gfx6 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME)>;
|
|
|
|
}
|
|
|
|
} // End AssemblerPredicate = isGFX6, DecoderNamespace = "GFX6"
|
|
|
|
|
|
|
|
let AssemblerPredicate = isGFX7Only, DecoderNamespace = "GFX7" in {
|
|
|
|
multiclass MUBUF_Real_gfx7<bits<8> op> {
|
|
|
|
def _gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME)>;
|
|
|
|
}
|
|
|
|
} // End AssemblerPredicate = isGFX7Only, DecoderNamespace = "GFX7"
|
|
|
|
|
|
|
|
let AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" in {
|
|
|
|
multiclass MUBUF_Real_AllAddr_gfx6_gfx7<bits<8> op> {
|
|
|
|
def _ADDR64_gfx6_gfx7 :
|
|
|
|
MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_ADDR64")>;
|
|
|
|
def _BOTHEN_gfx6_gfx7 :
|
|
|
|
MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN")>;
|
|
|
|
def _IDXEN_gfx6_gfx7 :
|
|
|
|
MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN")>;
|
|
|
|
def _OFFEN_gfx6_gfx7 :
|
|
|
|
MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN")>;
|
|
|
|
def _OFFSET_gfx6_gfx7 :
|
|
|
|
MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET")>;
|
|
|
|
}
|
|
|
|
multiclass MUBUF_Real_AllAddr_Lds_gfx6_gfx7<bits<8> op> {
|
|
|
|
def _OFFSET_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET")>,
|
|
|
|
MUBUFLdsTable<0, NAME # "_OFFSET_gfx6_gfx7">;
|
|
|
|
def _ADDR64_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_ADDR64")>,
|
|
|
|
MUBUFLdsTable<0, NAME # "_ADDR64_gfx6_gfx7">;
|
|
|
|
def _OFFEN_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN")>,
|
|
|
|
MUBUFLdsTable<0, NAME # "_OFFEN_gfx6_gfx7">;
|
|
|
|
def _IDXEN_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN")>,
|
|
|
|
MUBUFLdsTable<0, NAME # "_IDXEN_gfx6_gfx7">;
|
|
|
|
def _BOTHEN_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN")>,
|
|
|
|
MUBUFLdsTable<0, NAME # "_BOTHEN_gfx6_gfx7">;
|
|
|
|
|
|
|
|
def _LDS_OFFSET_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_LDS_OFFSET")>,
|
|
|
|
MUBUFLdsTable<1, NAME # "_OFFSET_gfx6_gfx7">;
|
|
|
|
def _LDS_ADDR64_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_LDS_ADDR64")>,
|
|
|
|
MUBUFLdsTable<1, NAME # "_ADDR64_gfx6_gfx7">;
|
|
|
|
def _LDS_OFFEN_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_LDS_OFFEN")>,
|
|
|
|
MUBUFLdsTable<1, NAME # "_OFFEN_gfx6_gfx7">;
|
|
|
|
def _LDS_IDXEN_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_LDS_IDXEN")>,
|
|
|
|
MUBUFLdsTable<1, NAME # "_IDXEN_gfx6_gfx7">;
|
|
|
|
def _LDS_BOTHEN_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_LDS_BOTHEN")>,
|
|
|
|
MUBUFLdsTable<1, NAME # "_BOTHEN_gfx6_gfx7">;
|
|
|
|
}
|
|
|
|
multiclass MUBUF_Real_Atomics_gfx6_gfx7<bits<8> op> :
|
|
|
|
MUBUF_Real_AllAddr_gfx6_gfx7<op> {
|
|
|
|
def _ADDR64_RTN_gfx6_gfx7 :
|
|
|
|
MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_ADDR64_RTN")>;
|
|
|
|
def _BOTHEN_RTN_gfx6_gfx7 :
|
|
|
|
MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN_RTN")>;
|
|
|
|
def _IDXEN_RTN_gfx6_gfx7 :
|
|
|
|
MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN_RTN")>;
|
|
|
|
def _OFFEN_RTN_gfx6_gfx7 :
|
|
|
|
MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN_RTN")>;
|
|
|
|
def _OFFSET_RTN_gfx6_gfx7 :
|
|
|
|
MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET_RTN")>;
|
|
|
|
}
|
|
|
|
} // End AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7"
|
|
|
|
|
|
|
|
multiclass MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<bits<8> op> :
|
|
|
|
MUBUF_Real_AllAddr_gfx6_gfx7<op>, MUBUF_Real_AllAddr_gfx10<op>;
|
|
|
|
|
|
|
|
multiclass MUBUF_Real_AllAddr_Lds_gfx6_gfx7_gfx10<bits<8> op> :
|
|
|
|
MUBUF_Real_AllAddr_Lds_gfx6_gfx7<op>, MUBUF_Real_AllAddr_Lds_gfx10<op>;
|
|
|
|
|
|
|
|
multiclass MUBUF_Real_Atomics_gfx6_gfx7_gfx10<bits<8> op> :
|
|
|
|
MUBUF_Real_Atomics_gfx6_gfx7<op>, MUBUF_Real_Atomics_gfx10<op>;
|
|
|
|
|
|
|
|
// FIXME-GFX6: Following instructions are available only on GFX6.
|
|
|
|
//defm BUFFER_ATOMIC_RSUB : MUBUF_Real_Atomics_gfx6 <0x034>;
|
|
|
|
//defm BUFFER_ATOMIC_RSUB_X2 : MUBUF_Real_Atomics_gfx6 <0x054>;
|
|
|
|
|
|
|
|
defm BUFFER_LOAD_FORMAT_X : MUBUF_Real_AllAddr_Lds_gfx6_gfx7_gfx10<0x000>;
|
|
|
|
defm BUFFER_LOAD_FORMAT_XY : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x001>;
|
|
|
|
defm BUFFER_LOAD_FORMAT_XYZ : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x002>;
|
|
|
|
defm BUFFER_LOAD_FORMAT_XYZW : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x003>;
|
|
|
|
defm BUFFER_STORE_FORMAT_X : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x004>;
|
|
|
|
defm BUFFER_STORE_FORMAT_XY : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x005>;
|
|
|
|
defm BUFFER_STORE_FORMAT_XYZ : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x006>;
|
|
|
|
defm BUFFER_STORE_FORMAT_XYZW : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x007>;
|
|
|
|
defm BUFFER_LOAD_UBYTE : MUBUF_Real_AllAddr_Lds_gfx6_gfx7_gfx10<0x008>;
|
|
|
|
defm BUFFER_LOAD_SBYTE : MUBUF_Real_AllAddr_Lds_gfx6_gfx7_gfx10<0x009>;
|
|
|
|
defm BUFFER_LOAD_USHORT : MUBUF_Real_AllAddr_Lds_gfx6_gfx7_gfx10<0x00a>;
|
|
|
|
defm BUFFER_LOAD_SSHORT : MUBUF_Real_AllAddr_Lds_gfx6_gfx7_gfx10<0x00b>;
|
|
|
|
defm BUFFER_LOAD_DWORD : MUBUF_Real_AllAddr_Lds_gfx6_gfx7_gfx10<0x00c>;
|
|
|
|
defm BUFFER_LOAD_DWORDX2 : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x00d>;
|
|
|
|
defm BUFFER_LOAD_DWORDX4 : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x00e>;
|
|
|
|
defm BUFFER_LOAD_DWORDX3 : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x00f>;
|
|
|
|
defm BUFFER_STORE_BYTE : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x018>;
|
|
|
|
defm BUFFER_STORE_SHORT : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x01a>;
|
|
|
|
defm BUFFER_STORE_DWORD : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x01c>;
|
|
|
|
defm BUFFER_STORE_DWORDX2 : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x01d>;
|
|
|
|
defm BUFFER_STORE_DWORDX4 : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x01e>;
|
|
|
|
defm BUFFER_STORE_DWORDX3 : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x01f>;
|
|
|
|
|
|
|
|
defm BUFFER_ATOMIC_SWAP : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x030>;
|
|
|
|
defm BUFFER_ATOMIC_CMPSWAP : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x031>;
|
|
|
|
defm BUFFER_ATOMIC_ADD : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x032>;
|
|
|
|
defm BUFFER_ATOMIC_SUB : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x033>;
|
|
|
|
defm BUFFER_ATOMIC_SMIN : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x035>;
|
|
|
|
defm BUFFER_ATOMIC_UMIN : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x036>;
|
|
|
|
defm BUFFER_ATOMIC_SMAX : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x037>;
|
|
|
|
defm BUFFER_ATOMIC_UMAX : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x038>;
|
|
|
|
defm BUFFER_ATOMIC_AND : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x039>;
|
|
|
|
defm BUFFER_ATOMIC_OR : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x03a>;
|
|
|
|
defm BUFFER_ATOMIC_XOR : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x03b>;
|
|
|
|
defm BUFFER_ATOMIC_INC : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x03c>;
|
|
|
|
defm BUFFER_ATOMIC_DEC : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x03d>;
|
|
|
|
// FIXME-GFX6-GFX7-GFX10: Add following instructions:
|
|
|
|
//defm BUFFER_ATOMIC_FCMPSWAP : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x03e>;
|
|
|
|
//defm BUFFER_ATOMIC_FMIN : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x03f>;
|
|
|
|
//defm BUFFER_ATOMIC_FMAX : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x040>;
|
|
|
|
defm BUFFER_ATOMIC_SWAP_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x050>;
|
|
|
|
defm BUFFER_ATOMIC_CMPSWAP_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x051>;
|
|
|
|
defm BUFFER_ATOMIC_ADD_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x052>;
|
|
|
|
defm BUFFER_ATOMIC_SUB_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x053>;
|
|
|
|
defm BUFFER_ATOMIC_SMIN_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x055>;
|
|
|
|
defm BUFFER_ATOMIC_UMIN_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x056>;
|
|
|
|
defm BUFFER_ATOMIC_SMAX_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x057>;
|
|
|
|
defm BUFFER_ATOMIC_UMAX_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x058>;
|
|
|
|
defm BUFFER_ATOMIC_AND_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x059>;
|
|
|
|
defm BUFFER_ATOMIC_OR_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05a>;
|
|
|
|
defm BUFFER_ATOMIC_XOR_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05b>;
|
|
|
|
defm BUFFER_ATOMIC_INC_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05c>;
|
|
|
|
defm BUFFER_ATOMIC_DEC_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05d>;
|
|
|
|
// FIXME-GFX7: Need to handle hazard for BUFFER_ATOMIC_FCMPSWAP_X2 on GFX7.
|
|
|
|
// FIXME-GFX6-GFX7-GFX10: Add following instructions:
|
|
|
|
//defm BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05e>;
|
|
|
|
//defm BUFFER_ATOMIC_FMIN_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05f>;
|
|
|
|
//defm BUFFER_ATOMIC_FMAX_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x060>;
|
|
|
|
|
|
|
|
defm BUFFER_WBINVL1_SC : MUBUF_Real_gfx6<0x070>;
|
|
|
|
defm BUFFER_WBINVL1_VOL : MUBUF_Real_gfx7<0x070>;
|
|
|
|
def BUFFER_WBINVL1_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<0x071, BUFFER_WBINVL1>;
|
|
|
|
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// Base ENC_MTBUF for GFX6, GFX7, GFX10.
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
class Base_MTBUF_Real_gfx6_gfx7_gfx10<bits<3> op, MTBUF_Pseudo ps, int ef> :
|
|
|
|
MTBUF_Real<ps>, Enc64, SIMCInstr<ps.PseudoInstr, ef> {
|
2017-06-23 00:29:22 +08:00
|
|
|
let Inst{11-0} = !if(ps.has_offset, offset, ?);
|
|
|
|
let Inst{12} = ps.offen;
|
|
|
|
let Inst{13} = ps.idxen;
|
|
|
|
let Inst{14} = !if(ps.has_glc, glc, ps.glc_value);
|
2016-09-24 05:21:21 +08:00
|
|
|
let Inst{18-16} = op;
|
2017-06-23 00:29:22 +08:00
|
|
|
let Inst{31-26} = 0x3a; //encoding
|
|
|
|
let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?);
|
|
|
|
let Inst{47-40} = !if(ps.has_vdata, vdata, ?);
|
|
|
|
let Inst{52-48} = !if(ps.has_srsrc, srsrc{6-2}, ?);
|
|
|
|
let Inst{54} = !if(ps.has_slc, slc, ?);
|
|
|
|
let Inst{55} = !if(ps.has_tfe, tfe, ?);
|
|
|
|
let Inst{63-56} = !if(ps.has_soffset, soffset, ?);
|
2016-09-10 21:09:16 +08:00
|
|
|
}
|
|
|
|
|
2019-05-01 06:08:23 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// MTBUF - GFX10.
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
class MTBUF_Real_gfx10<bits<4> op, MTBUF_Pseudo ps> :
|
|
|
|
Base_MTBUF_Real_gfx6_gfx7_gfx10<op{2-0}, ps, SIEncodingFamily.GFX10> {
|
|
|
|
let Inst{15} = !if(ps.has_dlc, dlc, ps.dlc_value);
|
|
|
|
let Inst{25-19} = format;
|
|
|
|
let Inst{53} = op{3};
|
|
|
|
}
|
|
|
|
|
|
|
|
let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in {
|
|
|
|
multiclass MTBUF_Real_AllAddr_gfx10<bits<4> op> {
|
|
|
|
def _BOTHEN_gfx10 :
|
|
|
|
MTBUF_Real_gfx10<op, !cast<MTBUF_Pseudo>(NAME#"_BOTHEN")>;
|
|
|
|
def _IDXEN_gfx10 :
|
|
|
|
MTBUF_Real_gfx10<op, !cast<MTBUF_Pseudo>(NAME#"_IDXEN")>;
|
|
|
|
def _OFFEN_gfx10 :
|
|
|
|
MTBUF_Real_gfx10<op, !cast<MTBUF_Pseudo>(NAME#"_OFFEN")>;
|
|
|
|
def _OFFSET_gfx10 :
|
|
|
|
MTBUF_Real_gfx10<op, !cast<MTBUF_Pseudo>(NAME#"_OFFSET")>;
|
|
|
|
}
|
|
|
|
} // End AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10"
|
2016-09-10 21:09:16 +08:00
|
|
|
|
2019-05-01 06:08:23 +08:00
|
|
|
defm TBUFFER_LOAD_FORMAT_D16_X : MTBUF_Real_AllAddr_gfx10<0x008>;
|
|
|
|
defm TBUFFER_LOAD_FORMAT_D16_XY : MTBUF_Real_AllAddr_gfx10<0x009>;
|
|
|
|
defm TBUFFER_LOAD_FORMAT_D16_XYZ : MTBUF_Real_AllAddr_gfx10<0x00a>;
|
|
|
|
defm TBUFFER_LOAD_FORMAT_D16_XYZW : MTBUF_Real_AllAddr_gfx10<0x00b>;
|
|
|
|
defm TBUFFER_STORE_FORMAT_D16_X : MTBUF_Real_AllAddr_gfx10<0x00c>;
|
|
|
|
defm TBUFFER_STORE_FORMAT_D16_XY : MTBUF_Real_AllAddr_gfx10<0x00d>;
|
|
|
|
defm TBUFFER_STORE_FORMAT_D16_XYZ : MTBUF_Real_AllAddr_gfx10<0x00e>;
|
|
|
|
defm TBUFFER_STORE_FORMAT_D16_XYZW : MTBUF_Real_AllAddr_gfx10<0x00f>;
|
2016-09-10 21:09:16 +08:00
|
|
|
|
|
|
|
//===----------------------------------------------------------------------===//
|
2019-05-01 06:08:23 +08:00
|
|
|
// MTBUF - GFX6, GFX7, GFX10.
|
2016-09-10 21:09:16 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2019-05-01 06:08:23 +08:00
|
|
|
class MTBUF_Real_gfx6_gfx7<bits<4> op, MTBUF_Pseudo ps> :
|
|
|
|
Base_MTBUF_Real_gfx6_gfx7_gfx10<op{2-0}, ps, SIEncodingFamily.SI> {
|
|
|
|
let Inst{15} = ps.addr64;
|
|
|
|
let Inst{22-19} = dfmt;
|
|
|
|
let Inst{25-23} = nfmt;
|
2016-09-10 21:09:16 +08:00
|
|
|
}
|
|
|
|
|
2019-05-01 06:08:23 +08:00
|
|
|
let AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" in {
|
|
|
|
multiclass MTBUF_Real_AllAddr_gfx6_gfx7<bits<4> op> {
|
|
|
|
def _ADDR64_gfx6_gfx7 :
|
|
|
|
MTBUF_Real_gfx6_gfx7<op, !cast<MTBUF_Pseudo>(NAME#"_ADDR64")>;
|
|
|
|
def _BOTHEN_gfx6_gfx7 :
|
|
|
|
MTBUF_Real_gfx6_gfx7<op, !cast<MTBUF_Pseudo>(NAME#"_BOTHEN")>;
|
|
|
|
def _IDXEN_gfx6_gfx7 :
|
|
|
|
MTBUF_Real_gfx6_gfx7<op, !cast<MTBUF_Pseudo>(NAME#"_IDXEN")>;
|
|
|
|
def _OFFEN_gfx6_gfx7 :
|
|
|
|
MTBUF_Real_gfx6_gfx7<op, !cast<MTBUF_Pseudo>(NAME#"_OFFEN")>;
|
|
|
|
def _OFFSET_gfx6_gfx7 :
|
|
|
|
MTBUF_Real_gfx6_gfx7<op, !cast<MTBUF_Pseudo>(NAME#"_OFFSET")>;
|
|
|
|
}
|
|
|
|
} // End AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7"
|
|
|
|
|
|
|
|
multiclass MTBUF_Real_AllAddr_gfx6_gfx7_gfx10<bits<4> op> :
|
|
|
|
MTBUF_Real_AllAddr_gfx6_gfx7<op>, MTBUF_Real_AllAddr_gfx10<op>;
|
2016-09-10 21:09:16 +08:00
|
|
|
|
2019-05-01 06:08:23 +08:00
|
|
|
defm TBUFFER_LOAD_FORMAT_X : MTBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x000>;
|
|
|
|
defm TBUFFER_LOAD_FORMAT_XY : MTBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x001>;
|
|
|
|
defm TBUFFER_LOAD_FORMAT_XYZ : MTBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x002>;
|
|
|
|
defm TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x003>;
|
|
|
|
defm TBUFFER_STORE_FORMAT_X : MTBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x004>;
|
|
|
|
defm TBUFFER_STORE_FORMAT_XY : MTBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x005>;
|
|
|
|
defm TBUFFER_STORE_FORMAT_XYZ : MTBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x006>;
|
|
|
|
defm TBUFFER_STORE_FORMAT_XYZW : MTBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x007>;
|
2016-09-10 21:09:16 +08:00
|
|
|
|
|
|
|
//===----------------------------------------------------------------------===//
|
2019-05-01 06:08:23 +08:00
|
|
|
// GFX8, GFX9 (VI).
|
2016-09-10 21:09:16 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
class MUBUF_Real_vi <bits<7> op, MUBUF_Pseudo ps> :
|
2019-05-01 06:08:23 +08:00
|
|
|
MUBUF_Real<ps>,
|
2016-09-10 21:09:16 +08:00
|
|
|
Enc64,
|
|
|
|
SIMCInstr<ps.PseudoInstr, SIEncodingFamily.VI> {
|
2019-04-06 17:20:48 +08:00
|
|
|
let AssemblerPredicate = isGFX8GFX9;
|
|
|
|
let DecoderNamespace = "GFX8";
|
2016-09-10 21:09:16 +08:00
|
|
|
|
|
|
|
let Inst{11-0} = !if(ps.has_offset, offset, ?);
|
|
|
|
let Inst{12} = ps.offen;
|
|
|
|
let Inst{13} = ps.idxen;
|
|
|
|
let Inst{14} = !if(ps.has_glc, glc, ps.glc_value);
|
2018-02-21 21:13:48 +08:00
|
|
|
let Inst{16} = !if(ps.lds, 1, 0);
|
2016-09-10 21:09:16 +08:00
|
|
|
let Inst{17} = !if(ps.has_slc, slc, ?);
|
|
|
|
let Inst{24-18} = op;
|
|
|
|
let Inst{31-26} = 0x38; //encoding
|
|
|
|
let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?);
|
|
|
|
let Inst{47-40} = !if(ps.has_vdata, vdata, ?);
|
|
|
|
let Inst{52-48} = !if(ps.has_srsrc, srsrc{6-2}, ?);
|
|
|
|
let Inst{55} = !if(ps.has_tfe, tfe, ?);
|
|
|
|
let Inst{63-56} = !if(ps.has_soffset, soffset, ?);
|
|
|
|
}
|
|
|
|
|
|
|
|
multiclass MUBUF_Real_AllAddr_vi<bits<7> op> {
|
|
|
|
def _OFFSET_vi : MUBUF_Real_vi <op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET")>;
|
|
|
|
def _OFFEN_vi : MUBUF_Real_vi <op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN")>;
|
|
|
|
def _IDXEN_vi : MUBUF_Real_vi <op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN")>;
|
|
|
|
def _BOTHEN_vi : MUBUF_Real_vi <op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN")>;
|
|
|
|
}
|
|
|
|
|
2018-02-21 21:13:48 +08:00
|
|
|
multiclass MUBUF_Real_AllAddr_Lds_vi<bits<7> op> {
|
|
|
|
|
|
|
|
def _OFFSET_vi : MUBUF_Real_vi <op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET")>,
|
TableGen: Streamline the semantics of NAME
Summary:
The new rules are straightforward. The main rules to keep in mind
are:
1. NAME is an implicit template argument of class and multiclass,
and will be substituted by the name of the instantiating def/defm.
2. The name of a def/defm in a multiclass must contain a reference
to NAME. If such a reference is not present, it is automatically
prepended.
And for some additional subtleties, consider these:
3. defm with no name generates a unique name but has no special
behavior otherwise.
4. def with no name generates an anonymous record, whose name is
unique but undefined. In particular, the name won't contain a
reference to NAME.
Keeping rules 1&2 in mind should allow a predictable behavior of
name resolution that is simple to follow.
The old "rules" were rather surprising: sometimes (but not always),
NAME would correspond to the name of the toplevel defm. They were
also plain bonkers when you pushed them to their limits, as the old
version of the TableGen test case shows.
Having NAME correspond to the name of the toplevel defm introduces
"spooky action at a distance" and breaks composability:
refactoring the upper layers of a hierarchy of nested multiclass
instantiations can cause unexpected breakage by changing the value
of NAME at a lower level of the hierarchy. The new rules don't
suffer from this problem.
Some existing .td files have to be adjusted because they ended up
depending on the details of the old implementation.
Change-Id: I694095231565b30f563e6fd0417b41ee01a12589
Reviewers: tra, simon_tatham, craig.topper, MartinO, arsenm, javed.absar
Subscribers: wdng, llvm-commits
Differential Revision: https://reviews.llvm.org/D47430
llvm-svn: 333900
2018-06-04 22:26:05 +08:00
|
|
|
MUBUFLdsTable<0, NAME # "_OFFSET_vi">;
|
2018-02-21 21:13:48 +08:00
|
|
|
def _OFFEN_vi : MUBUF_Real_vi <op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN")>,
|
TableGen: Streamline the semantics of NAME
Summary:
The new rules are straightforward. The main rules to keep in mind
are:
1. NAME is an implicit template argument of class and multiclass,
and will be substituted by the name of the instantiating def/defm.
2. The name of a def/defm in a multiclass must contain a reference
to NAME. If such a reference is not present, it is automatically
prepended.
And for some additional subtleties, consider these:
3. defm with no name generates a unique name but has no special
behavior otherwise.
4. def with no name generates an anonymous record, whose name is
unique but undefined. In particular, the name won't contain a
reference to NAME.
Keeping rules 1&2 in mind should allow a predictable behavior of
name resolution that is simple to follow.
The old "rules" were rather surprising: sometimes (but not always),
NAME would correspond to the name of the toplevel defm. They were
also plain bonkers when you pushed them to their limits, as the old
version of the TableGen test case shows.
Having NAME correspond to the name of the toplevel defm introduces
"spooky action at a distance" and breaks composability:
refactoring the upper layers of a hierarchy of nested multiclass
instantiations can cause unexpected breakage by changing the value
of NAME at a lower level of the hierarchy. The new rules don't
suffer from this problem.
Some existing .td files have to be adjusted because they ended up
depending on the details of the old implementation.
Change-Id: I694095231565b30f563e6fd0417b41ee01a12589
Reviewers: tra, simon_tatham, craig.topper, MartinO, arsenm, javed.absar
Subscribers: wdng, llvm-commits
Differential Revision: https://reviews.llvm.org/D47430
llvm-svn: 333900
2018-06-04 22:26:05 +08:00
|
|
|
MUBUFLdsTable<0, NAME # "_OFFEN_vi">;
|
2018-02-21 21:13:48 +08:00
|
|
|
def _IDXEN_vi : MUBUF_Real_vi <op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN")>,
|
TableGen: Streamline the semantics of NAME
Summary:
The new rules are straightforward. The main rules to keep in mind
are:
1. NAME is an implicit template argument of class and multiclass,
and will be substituted by the name of the instantiating def/defm.
2. The name of a def/defm in a multiclass must contain a reference
to NAME. If such a reference is not present, it is automatically
prepended.
And for some additional subtleties, consider these:
3. defm with no name generates a unique name but has no special
behavior otherwise.
4. def with no name generates an anonymous record, whose name is
unique but undefined. In particular, the name won't contain a
reference to NAME.
Keeping rules 1&2 in mind should allow a predictable behavior of
name resolution that is simple to follow.
The old "rules" were rather surprising: sometimes (but not always),
NAME would correspond to the name of the toplevel defm. They were
also plain bonkers when you pushed them to their limits, as the old
version of the TableGen test case shows.
Having NAME correspond to the name of the toplevel defm introduces
"spooky action at a distance" and breaks composability:
refactoring the upper layers of a hierarchy of nested multiclass
instantiations can cause unexpected breakage by changing the value
of NAME at a lower level of the hierarchy. The new rules don't
suffer from this problem.
Some existing .td files have to be adjusted because they ended up
depending on the details of the old implementation.
Change-Id: I694095231565b30f563e6fd0417b41ee01a12589
Reviewers: tra, simon_tatham, craig.topper, MartinO, arsenm, javed.absar
Subscribers: wdng, llvm-commits
Differential Revision: https://reviews.llvm.org/D47430
llvm-svn: 333900
2018-06-04 22:26:05 +08:00
|
|
|
MUBUFLdsTable<0, NAME # "_IDXEN_vi">;
|
2018-02-21 21:13:48 +08:00
|
|
|
def _BOTHEN_vi : MUBUF_Real_vi <op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN")>,
|
TableGen: Streamline the semantics of NAME
Summary:
The new rules are straightforward. The main rules to keep in mind
are:
1. NAME is an implicit template argument of class and multiclass,
and will be substituted by the name of the instantiating def/defm.
2. The name of a def/defm in a multiclass must contain a reference
to NAME. If such a reference is not present, it is automatically
prepended.
And for some additional subtleties, consider these:
3. defm with no name generates a unique name but has no special
behavior otherwise.
4. def with no name generates an anonymous record, whose name is
unique but undefined. In particular, the name won't contain a
reference to NAME.
Keeping rules 1&2 in mind should allow a predictable behavior of
name resolution that is simple to follow.
The old "rules" were rather surprising: sometimes (but not always),
NAME would correspond to the name of the toplevel defm. They were
also plain bonkers when you pushed them to their limits, as the old
version of the TableGen test case shows.
Having NAME correspond to the name of the toplevel defm introduces
"spooky action at a distance" and breaks composability:
refactoring the upper layers of a hierarchy of nested multiclass
instantiations can cause unexpected breakage by changing the value
of NAME at a lower level of the hierarchy. The new rules don't
suffer from this problem.
Some existing .td files have to be adjusted because they ended up
depending on the details of the old implementation.
Change-Id: I694095231565b30f563e6fd0417b41ee01a12589
Reviewers: tra, simon_tatham, craig.topper, MartinO, arsenm, javed.absar
Subscribers: wdng, llvm-commits
Differential Revision: https://reviews.llvm.org/D47430
llvm-svn: 333900
2018-06-04 22:26:05 +08:00
|
|
|
MUBUFLdsTable<0, NAME # "_BOTHEN_vi">;
|
2018-02-21 21:13:48 +08:00
|
|
|
|
|
|
|
def _LDS_OFFSET_vi : MUBUF_Real_vi <op, !cast<MUBUF_Pseudo>(NAME#"_LDS_OFFSET")>,
|
TableGen: Streamline the semantics of NAME
Summary:
The new rules are straightforward. The main rules to keep in mind
are:
1. NAME is an implicit template argument of class and multiclass,
and will be substituted by the name of the instantiating def/defm.
2. The name of a def/defm in a multiclass must contain a reference
to NAME. If such a reference is not present, it is automatically
prepended.
And for some additional subtleties, consider these:
3. defm with no name generates a unique name but has no special
behavior otherwise.
4. def with no name generates an anonymous record, whose name is
unique but undefined. In particular, the name won't contain a
reference to NAME.
Keeping rules 1&2 in mind should allow a predictable behavior of
name resolution that is simple to follow.
The old "rules" were rather surprising: sometimes (but not always),
NAME would correspond to the name of the toplevel defm. They were
also plain bonkers when you pushed them to their limits, as the old
version of the TableGen test case shows.
Having NAME correspond to the name of the toplevel defm introduces
"spooky action at a distance" and breaks composability:
refactoring the upper layers of a hierarchy of nested multiclass
instantiations can cause unexpected breakage by changing the value
of NAME at a lower level of the hierarchy. The new rules don't
suffer from this problem.
Some existing .td files have to be adjusted because they ended up
depending on the details of the old implementation.
Change-Id: I694095231565b30f563e6fd0417b41ee01a12589
Reviewers: tra, simon_tatham, craig.topper, MartinO, arsenm, javed.absar
Subscribers: wdng, llvm-commits
Differential Revision: https://reviews.llvm.org/D47430
llvm-svn: 333900
2018-06-04 22:26:05 +08:00
|
|
|
MUBUFLdsTable<1, NAME # "_OFFSET_vi">;
|
2018-02-21 21:13:48 +08:00
|
|
|
def _LDS_OFFEN_vi : MUBUF_Real_vi <op, !cast<MUBUF_Pseudo>(NAME#"_LDS_OFFEN")>,
|
TableGen: Streamline the semantics of NAME
Summary:
The new rules are straightforward. The main rules to keep in mind
are:
1. NAME is an implicit template argument of class and multiclass,
and will be substituted by the name of the instantiating def/defm.
2. The name of a def/defm in a multiclass must contain a reference
to NAME. If such a reference is not present, it is automatically
prepended.
And for some additional subtleties, consider these:
3. defm with no name generates a unique name but has no special
behavior otherwise.
4. def with no name generates an anonymous record, whose name is
unique but undefined. In particular, the name won't contain a
reference to NAME.
Keeping rules 1&2 in mind should allow a predictable behavior of
name resolution that is simple to follow.
The old "rules" were rather surprising: sometimes (but not always),
NAME would correspond to the name of the toplevel defm. They were
also plain bonkers when you pushed them to their limits, as the old
version of the TableGen test case shows.
Having NAME correspond to the name of the toplevel defm introduces
"spooky action at a distance" and breaks composability:
refactoring the upper layers of a hierarchy of nested multiclass
instantiations can cause unexpected breakage by changing the value
of NAME at a lower level of the hierarchy. The new rules don't
suffer from this problem.
Some existing .td files have to be adjusted because they ended up
depending on the details of the old implementation.
Change-Id: I694095231565b30f563e6fd0417b41ee01a12589
Reviewers: tra, simon_tatham, craig.topper, MartinO, arsenm, javed.absar
Subscribers: wdng, llvm-commits
Differential Revision: https://reviews.llvm.org/D47430
llvm-svn: 333900
2018-06-04 22:26:05 +08:00
|
|
|
MUBUFLdsTable<1, NAME # "_OFFEN_vi">;
|
2018-02-21 21:13:48 +08:00
|
|
|
def _LDS_IDXEN_vi : MUBUF_Real_vi <op, !cast<MUBUF_Pseudo>(NAME#"_LDS_IDXEN")>,
|
TableGen: Streamline the semantics of NAME
Summary:
The new rules are straightforward. The main rules to keep in mind
are:
1. NAME is an implicit template argument of class and multiclass,
and will be substituted by the name of the instantiating def/defm.
2. The name of a def/defm in a multiclass must contain a reference
to NAME. If such a reference is not present, it is automatically
prepended.
And for some additional subtleties, consider these:
3. defm with no name generates a unique name but has no special
behavior otherwise.
4. def with no name generates an anonymous record, whose name is
unique but undefined. In particular, the name won't contain a
reference to NAME.
Keeping rules 1&2 in mind should allow a predictable behavior of
name resolution that is simple to follow.
The old "rules" were rather surprising: sometimes (but not always),
NAME would correspond to the name of the toplevel defm. They were
also plain bonkers when you pushed them to their limits, as the old
version of the TableGen test case shows.
Having NAME correspond to the name of the toplevel defm introduces
"spooky action at a distance" and breaks composability:
refactoring the upper layers of a hierarchy of nested multiclass
instantiations can cause unexpected breakage by changing the value
of NAME at a lower level of the hierarchy. The new rules don't
suffer from this problem.
Some existing .td files have to be adjusted because they ended up
depending on the details of the old implementation.
Change-Id: I694095231565b30f563e6fd0417b41ee01a12589
Reviewers: tra, simon_tatham, craig.topper, MartinO, arsenm, javed.absar
Subscribers: wdng, llvm-commits
Differential Revision: https://reviews.llvm.org/D47430
llvm-svn: 333900
2018-06-04 22:26:05 +08:00
|
|
|
MUBUFLdsTable<1, NAME # "_IDXEN_vi">;
|
2018-02-21 21:13:48 +08:00
|
|
|
def _LDS_BOTHEN_vi : MUBUF_Real_vi <op, !cast<MUBUF_Pseudo>(NAME#"_LDS_BOTHEN")>,
|
TableGen: Streamline the semantics of NAME
Summary:
The new rules are straightforward. The main rules to keep in mind
are:
1. NAME is an implicit template argument of class and multiclass,
and will be substituted by the name of the instantiating def/defm.
2. The name of a def/defm in a multiclass must contain a reference
to NAME. If such a reference is not present, it is automatically
prepended.
And for some additional subtleties, consider these:
3. defm with no name generates a unique name but has no special
behavior otherwise.
4. def with no name generates an anonymous record, whose name is
unique but undefined. In particular, the name won't contain a
reference to NAME.
Keeping rules 1&2 in mind should allow a predictable behavior of
name resolution that is simple to follow.
The old "rules" were rather surprising: sometimes (but not always),
NAME would correspond to the name of the toplevel defm. They were
also plain bonkers when you pushed them to their limits, as the old
version of the TableGen test case shows.
Having NAME correspond to the name of the toplevel defm introduces
"spooky action at a distance" and breaks composability:
refactoring the upper layers of a hierarchy of nested multiclass
instantiations can cause unexpected breakage by changing the value
of NAME at a lower level of the hierarchy. The new rules don't
suffer from this problem.
Some existing .td files have to be adjusted because they ended up
depending on the details of the old implementation.
Change-Id: I694095231565b30f563e6fd0417b41ee01a12589
Reviewers: tra, simon_tatham, craig.topper, MartinO, arsenm, javed.absar
Subscribers: wdng, llvm-commits
Differential Revision: https://reviews.llvm.org/D47430
llvm-svn: 333900
2018-06-04 22:26:05 +08:00
|
|
|
MUBUFLdsTable<1, NAME # "_BOTHEN_vi">;
|
2018-02-21 21:13:48 +08:00
|
|
|
}
|
|
|
|
|
2018-01-13 05:12:19 +08:00
|
|
|
class MUBUF_Real_gfx80 <bits<7> op, MUBUF_Pseudo ps> :
|
2019-05-01 06:08:23 +08:00
|
|
|
MUBUF_Real<ps>,
|
2018-01-13 05:12:19 +08:00
|
|
|
Enc64,
|
|
|
|
SIMCInstr<ps.PseudoInstr, SIEncodingFamily.GFX80> {
|
|
|
|
let AssemblerPredicate=HasUnpackedD16VMem;
|
|
|
|
let DecoderNamespace="GFX80_UNPACKED";
|
|
|
|
|
|
|
|
let Inst{11-0} = !if(ps.has_offset, offset, ?);
|
|
|
|
let Inst{12} = ps.offen;
|
|
|
|
let Inst{13} = ps.idxen;
|
|
|
|
let Inst{14} = !if(ps.has_glc, glc, ps.glc_value);
|
2018-02-21 21:13:48 +08:00
|
|
|
let Inst{16} = !if(ps.lds, 1, 0);
|
2018-01-13 05:12:19 +08:00
|
|
|
let Inst{17} = !if(ps.has_slc, slc, ?);
|
|
|
|
let Inst{24-18} = op;
|
|
|
|
let Inst{31-26} = 0x38; //encoding
|
|
|
|
let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?);
|
|
|
|
let Inst{47-40} = !if(ps.has_vdata, vdata, ?);
|
|
|
|
let Inst{52-48} = !if(ps.has_srsrc, srsrc{6-2}, ?);
|
|
|
|
let Inst{55} = !if(ps.has_tfe, tfe, ?);
|
|
|
|
let Inst{63-56} = !if(ps.has_soffset, soffset, ?);
|
|
|
|
}
|
|
|
|
|
|
|
|
multiclass MUBUF_Real_AllAddr_gfx80<bits<7> op> {
|
2018-01-19 06:57:57 +08:00
|
|
|
def _OFFSET_gfx80 : MUBUF_Real_gfx80 <op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET")>;
|
|
|
|
def _OFFEN_gfx80 : MUBUF_Real_gfx80 <op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN")>;
|
|
|
|
def _IDXEN_gfx80 : MUBUF_Real_gfx80 <op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN")>;
|
|
|
|
def _BOTHEN_gfx80 : MUBUF_Real_gfx80 <op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN")>;
|
2018-01-13 05:12:19 +08:00
|
|
|
}
|
|
|
|
|
2016-09-10 21:09:16 +08:00
|
|
|
multiclass MUBUF_Real_Atomic_vi<bits<7> op> :
|
|
|
|
MUBUF_Real_AllAddr_vi<op> {
|
2017-07-21 05:06:04 +08:00
|
|
|
def _OFFSET_RTN_vi : MUBUF_Real_vi <op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET_RTN")>;
|
|
|
|
def _OFFEN_RTN_vi : MUBUF_Real_vi <op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN_RTN")>;
|
|
|
|
def _IDXEN_RTN_vi : MUBUF_Real_vi <op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN_RTN")>;
|
|
|
|
def _BOTHEN_RTN_vi : MUBUF_Real_vi <op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN_RTN")>;
|
2016-09-10 21:09:16 +08:00
|
|
|
}
|
|
|
|
|
2018-02-21 21:13:48 +08:00
|
|
|
defm BUFFER_LOAD_FORMAT_X : MUBUF_Real_AllAddr_Lds_vi <0x00>;
|
2016-09-10 21:09:16 +08:00
|
|
|
defm BUFFER_LOAD_FORMAT_XY : MUBUF_Real_AllAddr_vi <0x01>;
|
|
|
|
defm BUFFER_LOAD_FORMAT_XYZ : MUBUF_Real_AllAddr_vi <0x02>;
|
|
|
|
defm BUFFER_LOAD_FORMAT_XYZW : MUBUF_Real_AllAddr_vi <0x03>;
|
|
|
|
defm BUFFER_STORE_FORMAT_X : MUBUF_Real_AllAddr_vi <0x04>;
|
|
|
|
defm BUFFER_STORE_FORMAT_XY : MUBUF_Real_AllAddr_vi <0x05>;
|
|
|
|
defm BUFFER_STORE_FORMAT_XYZ : MUBUF_Real_AllAddr_vi <0x06>;
|
|
|
|
defm BUFFER_STORE_FORMAT_XYZW : MUBUF_Real_AllAddr_vi <0x07>;
|
2018-01-13 05:12:19 +08:00
|
|
|
let SubtargetPredicate = HasUnpackedD16VMem in {
|
|
|
|
defm BUFFER_LOAD_FORMAT_D16_X_gfx80 : MUBUF_Real_AllAddr_gfx80 <0x08>;
|
|
|
|
defm BUFFER_LOAD_FORMAT_D16_XY_gfx80 : MUBUF_Real_AllAddr_gfx80 <0x09>;
|
|
|
|
defm BUFFER_LOAD_FORMAT_D16_XYZ_gfx80 : MUBUF_Real_AllAddr_gfx80 <0x0a>;
|
|
|
|
defm BUFFER_LOAD_FORMAT_D16_XYZW_gfx80 : MUBUF_Real_AllAddr_gfx80 <0x0b>;
|
|
|
|
defm BUFFER_STORE_FORMAT_D16_X_gfx80 : MUBUF_Real_AllAddr_gfx80 <0x0c>;
|
|
|
|
defm BUFFER_STORE_FORMAT_D16_XY_gfx80 : MUBUF_Real_AllAddr_gfx80 <0x0d>;
|
|
|
|
defm BUFFER_STORE_FORMAT_D16_XYZ_gfx80 : MUBUF_Real_AllAddr_gfx80 <0x0e>;
|
|
|
|
defm BUFFER_STORE_FORMAT_D16_XYZW_gfx80 : MUBUF_Real_AllAddr_gfx80 <0x0f>;
|
|
|
|
} // End HasUnpackedD16VMem.
|
|
|
|
let SubtargetPredicate = HasPackedD16VMem in {
|
|
|
|
defm BUFFER_LOAD_FORMAT_D16_X : MUBUF_Real_AllAddr_vi <0x08>;
|
|
|
|
defm BUFFER_LOAD_FORMAT_D16_XY : MUBUF_Real_AllAddr_vi <0x09>;
|
|
|
|
defm BUFFER_LOAD_FORMAT_D16_XYZ : MUBUF_Real_AllAddr_vi <0x0a>;
|
|
|
|
defm BUFFER_LOAD_FORMAT_D16_XYZW : MUBUF_Real_AllAddr_vi <0x0b>;
|
|
|
|
defm BUFFER_STORE_FORMAT_D16_X : MUBUF_Real_AllAddr_vi <0x0c>;
|
|
|
|
defm BUFFER_STORE_FORMAT_D16_XY : MUBUF_Real_AllAddr_vi <0x0d>;
|
|
|
|
defm BUFFER_STORE_FORMAT_D16_XYZ : MUBUF_Real_AllAddr_vi <0x0e>;
|
|
|
|
defm BUFFER_STORE_FORMAT_D16_XYZW : MUBUF_Real_AllAddr_vi <0x0f>;
|
|
|
|
} // End HasPackedD16VMem.
|
2018-02-21 21:13:48 +08:00
|
|
|
defm BUFFER_LOAD_UBYTE : MUBUF_Real_AllAddr_Lds_vi <0x10>;
|
|
|
|
defm BUFFER_LOAD_SBYTE : MUBUF_Real_AllAddr_Lds_vi <0x11>;
|
|
|
|
defm BUFFER_LOAD_USHORT : MUBUF_Real_AllAddr_Lds_vi <0x12>;
|
|
|
|
defm BUFFER_LOAD_SSHORT : MUBUF_Real_AllAddr_Lds_vi <0x13>;
|
|
|
|
defm BUFFER_LOAD_DWORD : MUBUF_Real_AllAddr_Lds_vi <0x14>;
|
2018-06-13 23:32:46 +08:00
|
|
|
defm BUFFER_LOAD_DWORDX2 : MUBUF_Real_AllAddr_Lds_vi <0x15>;
|
|
|
|
defm BUFFER_LOAD_DWORDX3 : MUBUF_Real_AllAddr_Lds_vi <0x16>;
|
|
|
|
defm BUFFER_LOAD_DWORDX4 : MUBUF_Real_AllAddr_Lds_vi <0x17>;
|
2016-09-10 21:09:16 +08:00
|
|
|
defm BUFFER_STORE_BYTE : MUBUF_Real_AllAddr_vi <0x18>;
|
2017-09-02 02:36:06 +08:00
|
|
|
defm BUFFER_STORE_BYTE_D16_HI : MUBUF_Real_AllAddr_vi <0x19>;
|
2016-09-10 21:09:16 +08:00
|
|
|
defm BUFFER_STORE_SHORT : MUBUF_Real_AllAddr_vi <0x1a>;
|
2017-09-02 02:36:06 +08:00
|
|
|
defm BUFFER_STORE_SHORT_D16_HI : MUBUF_Real_AllAddr_vi <0x1b>;
|
2016-09-10 21:09:16 +08:00
|
|
|
defm BUFFER_STORE_DWORD : MUBUF_Real_AllAddr_vi <0x1c>;
|
|
|
|
defm BUFFER_STORE_DWORDX2 : MUBUF_Real_AllAddr_vi <0x1d>;
|
2016-10-07 23:53:16 +08:00
|
|
|
defm BUFFER_STORE_DWORDX3 : MUBUF_Real_AllAddr_vi <0x1e>;
|
2016-09-10 21:09:16 +08:00
|
|
|
defm BUFFER_STORE_DWORDX4 : MUBUF_Real_AllAddr_vi <0x1f>;
|
|
|
|
|
2017-09-02 02:36:06 +08:00
|
|
|
defm BUFFER_LOAD_UBYTE_D16 : MUBUF_Real_AllAddr_vi <0x20>;
|
|
|
|
defm BUFFER_LOAD_UBYTE_D16_HI : MUBUF_Real_AllAddr_vi <0x21>;
|
|
|
|
defm BUFFER_LOAD_SBYTE_D16 : MUBUF_Real_AllAddr_vi <0x22>;
|
|
|
|
defm BUFFER_LOAD_SBYTE_D16_HI : MUBUF_Real_AllAddr_vi <0x23>;
|
|
|
|
defm BUFFER_LOAD_SHORT_D16 : MUBUF_Real_AllAddr_vi <0x24>;
|
|
|
|
defm BUFFER_LOAD_SHORT_D16_HI : MUBUF_Real_AllAddr_vi <0x25>;
|
|
|
|
|
2018-03-28 22:53:13 +08:00
|
|
|
defm BUFFER_LOAD_FORMAT_D16_HI_X : MUBUF_Real_AllAddr_vi <0x26>;
|
|
|
|
defm BUFFER_STORE_FORMAT_D16_HI_X : MUBUF_Real_AllAddr_vi <0x27>;
|
|
|
|
|
2016-09-10 21:09:16 +08:00
|
|
|
defm BUFFER_ATOMIC_SWAP : MUBUF_Real_Atomic_vi <0x40>;
|
|
|
|
defm BUFFER_ATOMIC_CMPSWAP : MUBUF_Real_Atomic_vi <0x41>;
|
|
|
|
defm BUFFER_ATOMIC_ADD : MUBUF_Real_Atomic_vi <0x42>;
|
|
|
|
defm BUFFER_ATOMIC_SUB : MUBUF_Real_Atomic_vi <0x43>;
|
|
|
|
defm BUFFER_ATOMIC_SMIN : MUBUF_Real_Atomic_vi <0x44>;
|
|
|
|
defm BUFFER_ATOMIC_UMIN : MUBUF_Real_Atomic_vi <0x45>;
|
|
|
|
defm BUFFER_ATOMIC_SMAX : MUBUF_Real_Atomic_vi <0x46>;
|
|
|
|
defm BUFFER_ATOMIC_UMAX : MUBUF_Real_Atomic_vi <0x47>;
|
|
|
|
defm BUFFER_ATOMIC_AND : MUBUF_Real_Atomic_vi <0x48>;
|
|
|
|
defm BUFFER_ATOMIC_OR : MUBUF_Real_Atomic_vi <0x49>;
|
|
|
|
defm BUFFER_ATOMIC_XOR : MUBUF_Real_Atomic_vi <0x4a>;
|
|
|
|
defm BUFFER_ATOMIC_INC : MUBUF_Real_Atomic_vi <0x4b>;
|
|
|
|
defm BUFFER_ATOMIC_DEC : MUBUF_Real_Atomic_vi <0x4c>;
|
|
|
|
|
|
|
|
defm BUFFER_ATOMIC_SWAP_X2 : MUBUF_Real_Atomic_vi <0x60>;
|
|
|
|
defm BUFFER_ATOMIC_CMPSWAP_X2 : MUBUF_Real_Atomic_vi <0x61>;
|
|
|
|
defm BUFFER_ATOMIC_ADD_X2 : MUBUF_Real_Atomic_vi <0x62>;
|
|
|
|
defm BUFFER_ATOMIC_SUB_X2 : MUBUF_Real_Atomic_vi <0x63>;
|
|
|
|
defm BUFFER_ATOMIC_SMIN_X2 : MUBUF_Real_Atomic_vi <0x64>;
|
|
|
|
defm BUFFER_ATOMIC_UMIN_X2 : MUBUF_Real_Atomic_vi <0x65>;
|
|
|
|
defm BUFFER_ATOMIC_SMAX_X2 : MUBUF_Real_Atomic_vi <0x66>;
|
|
|
|
defm BUFFER_ATOMIC_UMAX_X2 : MUBUF_Real_Atomic_vi <0x67>;
|
|
|
|
defm BUFFER_ATOMIC_AND_X2 : MUBUF_Real_Atomic_vi <0x68>;
|
|
|
|
defm BUFFER_ATOMIC_OR_X2 : MUBUF_Real_Atomic_vi <0x69>;
|
|
|
|
defm BUFFER_ATOMIC_XOR_X2 : MUBUF_Real_Atomic_vi <0x6a>;
|
|
|
|
defm BUFFER_ATOMIC_INC_X2 : MUBUF_Real_Atomic_vi <0x6b>;
|
|
|
|
defm BUFFER_ATOMIC_DEC_X2 : MUBUF_Real_Atomic_vi <0x6c>;
|
|
|
|
|
2018-03-13 01:29:24 +08:00
|
|
|
def BUFFER_STORE_LDS_DWORD_vi : MUBUF_Real_vi <0x3d, BUFFER_STORE_LDS_DWORD>;
|
|
|
|
|
2016-09-10 21:09:16 +08:00
|
|
|
def BUFFER_WBINVL1_vi : MUBUF_Real_vi <0x3e, BUFFER_WBINVL1>;
|
|
|
|
def BUFFER_WBINVL1_VOL_vi : MUBUF_Real_vi <0x3f, BUFFER_WBINVL1_VOL>;
|
|
|
|
|
2019-07-11 08:10:17 +08:00
|
|
|
let SubtargetPredicate = HasAtomicFaddInsts in {
|
|
|
|
|
|
|
|
defm BUFFER_ATOMIC_ADD_F32 : MUBUF_Real_AllAddr_vi <0x4d>;
|
|
|
|
defm BUFFER_ATOMIC_PK_ADD_F16 : MUBUF_Real_AllAddr_vi <0x4e>;
|
|
|
|
|
|
|
|
} // End SubtargetPredicate = HasAtomicFaddInsts
|
|
|
|
|
2016-09-24 05:21:21 +08:00
|
|
|
class MTBUF_Real_vi <bits<4> op, MTBUF_Pseudo ps> :
|
|
|
|
MTBUF_Real<ps>,
|
2017-06-23 00:29:22 +08:00
|
|
|
Enc64,
|
2016-09-10 21:09:16 +08:00
|
|
|
SIMCInstr<ps.PseudoInstr, SIEncodingFamily.VI> {
|
2019-04-06 17:20:48 +08:00
|
|
|
let AssemblerPredicate = isGFX8GFX9;
|
|
|
|
let DecoderNamespace = "GFX8";
|
2016-09-24 05:21:21 +08:00
|
|
|
|
2017-06-23 00:29:22 +08:00
|
|
|
let Inst{11-0} = !if(ps.has_offset, offset, ?);
|
|
|
|
let Inst{12} = ps.offen;
|
|
|
|
let Inst{13} = ps.idxen;
|
|
|
|
let Inst{14} = !if(ps.has_glc, glc, ps.glc_value);
|
2016-09-24 05:21:21 +08:00
|
|
|
let Inst{18-15} = op;
|
[AMDGPU] New tbuffer intrinsics
Summary:
This commit adds new intrinsics
llvm.amdgcn.raw.tbuffer.load
llvm.amdgcn.struct.tbuffer.load
llvm.amdgcn.raw.tbuffer.store
llvm.amdgcn.struct.tbuffer.store
with the following changes from the llvm.amdgcn.tbuffer.* intrinsics:
* there are separate raw and struct versions: raw does not have an index
arg and sets idxen=0 in the instruction, and struct always sets
idxen=1 in the instruction even if the index is 0, to allow for the
fact that gfx9 does bounds checking differently depending on whether
idxen is set;
* there is a combined format arg (dfmt+nfmt)
* there is a combined cachepolicy arg (glc+slc)
* there are now only two offset args: one for the offset that is
included in bounds checking and swizzling, to be split between the
instruction's voffset and immoffset fields, and one for the offset
that is excluded from bounds checking and swizzling, to go into the
instruction's soffset field.
The AMDISD::TBUFFER_* SD nodes always have an index operand, all three
offset operands, combined format operand, combined cachepolicy operand,
and an extra idxen operand.
The tbuffer pseudo- and real instructions now also have a combined
format operand.
The obsolescent llvm.amdgcn.tbuffer.* and llvm.SI.tbuffer.store
intrinsics continue to work.
V2: Separate raw and struct intrinsics.
V3: Moved extract_glc and extract_slc defs to a more sensible place.
V4: Rebased on D49995.
V5: Only two separate offset args instead of three.
V6: Pseudo- and real instructions have joint format operand.
V7: Restored optionality of dfmt and nfmt in assembler.
V8: Addressed minor review comments.
Subscribers: arsenm, kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, t-tye, llvm-commits
Differential Revision: https://reviews.llvm.org/D49026
Change-Id: If22ad77e349fac3a5d2f72dda53c010377d470d4
llvm-svn: 340268
2018-08-21 19:06:05 +08:00
|
|
|
let Inst{22-19} = dfmt;
|
|
|
|
let Inst{25-23} = nfmt;
|
2017-06-23 00:29:22 +08:00
|
|
|
let Inst{31-26} = 0x3a; //encoding
|
|
|
|
let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?);
|
|
|
|
let Inst{47-40} = !if(ps.has_vdata, vdata, ?);
|
|
|
|
let Inst{52-48} = !if(ps.has_srsrc, srsrc{6-2}, ?);
|
|
|
|
let Inst{54} = !if(ps.has_slc, slc, ?);
|
|
|
|
let Inst{55} = !if(ps.has_tfe, tfe, ?);
|
|
|
|
let Inst{63-56} = !if(ps.has_soffset, soffset, ?);
|
2016-09-10 21:09:16 +08:00
|
|
|
}
|
|
|
|
|
2017-06-23 00:29:22 +08:00
|
|
|
multiclass MTBUF_Real_AllAddr_vi<bits<4> op> {
|
|
|
|
def _OFFSET_vi : MTBUF_Real_vi <op, !cast<MTBUF_Pseudo>(NAME#"_OFFSET")>;
|
|
|
|
def _OFFEN_vi : MTBUF_Real_vi <op, !cast<MTBUF_Pseudo>(NAME#"_OFFEN")>;
|
|
|
|
def _IDXEN_vi : MTBUF_Real_vi <op, !cast<MTBUF_Pseudo>(NAME#"_IDXEN")>;
|
|
|
|
def _BOTHEN_vi : MTBUF_Real_vi <op, !cast<MTBUF_Pseudo>(NAME#"_BOTHEN")>;
|
|
|
|
}
|
2016-09-10 21:09:16 +08:00
|
|
|
|
2018-01-13 05:12:19 +08:00
|
|
|
class MTBUF_Real_gfx80 <bits<4> op, MTBUF_Pseudo ps> :
|
|
|
|
MTBUF_Real<ps>,
|
|
|
|
Enc64,
|
|
|
|
SIMCInstr<ps.PseudoInstr, SIEncodingFamily.GFX80> {
|
|
|
|
let AssemblerPredicate=HasUnpackedD16VMem;
|
|
|
|
let DecoderNamespace="GFX80_UNPACKED";
|
|
|
|
|
|
|
|
let Inst{11-0} = !if(ps.has_offset, offset, ?);
|
|
|
|
let Inst{12} = ps.offen;
|
|
|
|
let Inst{13} = ps.idxen;
|
|
|
|
let Inst{14} = !if(ps.has_glc, glc, ps.glc_value);
|
|
|
|
let Inst{18-15} = op;
|
[AMDGPU] New tbuffer intrinsics
Summary:
This commit adds new intrinsics
llvm.amdgcn.raw.tbuffer.load
llvm.amdgcn.struct.tbuffer.load
llvm.amdgcn.raw.tbuffer.store
llvm.amdgcn.struct.tbuffer.store
with the following changes from the llvm.amdgcn.tbuffer.* intrinsics:
* there are separate raw and struct versions: raw does not have an index
arg and sets idxen=0 in the instruction, and struct always sets
idxen=1 in the instruction even if the index is 0, to allow for the
fact that gfx9 does bounds checking differently depending on whether
idxen is set;
* there is a combined format arg (dfmt+nfmt)
* there is a combined cachepolicy arg (glc+slc)
* there are now only two offset args: one for the offset that is
included in bounds checking and swizzling, to be split between the
instruction's voffset and immoffset fields, and one for the offset
that is excluded from bounds checking and swizzling, to go into the
instruction's soffset field.
The AMDISD::TBUFFER_* SD nodes always have an index operand, all three
offset operands, combined format operand, combined cachepolicy operand,
and an extra idxen operand.
The tbuffer pseudo- and real instructions now also have a combined
format operand.
The obsolescent llvm.amdgcn.tbuffer.* and llvm.SI.tbuffer.store
intrinsics continue to work.
V2: Separate raw and struct intrinsics.
V3: Moved extract_glc and extract_slc defs to a more sensible place.
V4: Rebased on D49995.
V5: Only two separate offset args instead of three.
V6: Pseudo- and real instructions have joint format operand.
V7: Restored optionality of dfmt and nfmt in assembler.
V8: Addressed minor review comments.
Subscribers: arsenm, kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, t-tye, llvm-commits
Differential Revision: https://reviews.llvm.org/D49026
Change-Id: If22ad77e349fac3a5d2f72dda53c010377d470d4
llvm-svn: 340268
2018-08-21 19:06:05 +08:00
|
|
|
let Inst{22-19} = dfmt;
|
|
|
|
let Inst{25-23} = nfmt;
|
2018-01-13 05:12:19 +08:00
|
|
|
let Inst{31-26} = 0x3a; //encoding
|
|
|
|
let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?);
|
|
|
|
let Inst{47-40} = !if(ps.has_vdata, vdata, ?);
|
|
|
|
let Inst{52-48} = !if(ps.has_srsrc, srsrc{6-2}, ?);
|
|
|
|
let Inst{54} = !if(ps.has_slc, slc, ?);
|
|
|
|
let Inst{55} = !if(ps.has_tfe, tfe, ?);
|
|
|
|
let Inst{63-56} = !if(ps.has_soffset, soffset, ?);
|
|
|
|
}
|
|
|
|
|
|
|
|
multiclass MTBUF_Real_AllAddr_gfx80<bits<4> op> {
|
|
|
|
def _OFFSET_gfx80 : MTBUF_Real_gfx80 <op, !cast<MTBUF_Pseudo>(NAME#"_OFFSET")>;
|
|
|
|
def _OFFEN_gfx80 : MTBUF_Real_gfx80 <op, !cast<MTBUF_Pseudo>(NAME#"_OFFEN")>;
|
|
|
|
def _IDXEN_gfx80 : MTBUF_Real_gfx80 <op, !cast<MTBUF_Pseudo>(NAME#"_IDXEN")>;
|
|
|
|
def _BOTHEN_gfx80 : MTBUF_Real_gfx80 <op, !cast<MTBUF_Pseudo>(NAME#"_BOTHEN")>;
|
|
|
|
}
|
|
|
|
|
|
|
|
defm TBUFFER_LOAD_FORMAT_X : MTBUF_Real_AllAddr_vi <0x00>;
|
|
|
|
defm TBUFFER_LOAD_FORMAT_XY : MTBUF_Real_AllAddr_vi <0x01>;
|
|
|
|
defm TBUFFER_LOAD_FORMAT_XYZ : MTBUF_Real_AllAddr_vi <0x02>;
|
|
|
|
defm TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Real_AllAddr_vi <0x03>;
|
|
|
|
defm TBUFFER_STORE_FORMAT_X : MTBUF_Real_AllAddr_vi <0x04>;
|
|
|
|
defm TBUFFER_STORE_FORMAT_XY : MTBUF_Real_AllAddr_vi <0x05>;
|
|
|
|
defm TBUFFER_STORE_FORMAT_XYZ : MTBUF_Real_AllAddr_vi <0x06>;
|
|
|
|
defm TBUFFER_STORE_FORMAT_XYZW : MTBUF_Real_AllAddr_vi <0x07>;
|
|
|
|
let SubtargetPredicate = HasUnpackedD16VMem in {
|
|
|
|
defm TBUFFER_LOAD_FORMAT_D16_X_gfx80 : MTBUF_Real_AllAddr_gfx80 <0x08>;
|
|
|
|
defm TBUFFER_LOAD_FORMAT_D16_XY_gfx80 : MTBUF_Real_AllAddr_gfx80 <0x09>;
|
|
|
|
defm TBUFFER_LOAD_FORMAT_D16_XYZ_gfx80 : MTBUF_Real_AllAddr_gfx80 <0x0a>;
|
|
|
|
defm TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80 : MTBUF_Real_AllAddr_gfx80 <0x0b>;
|
|
|
|
defm TBUFFER_STORE_FORMAT_D16_X_gfx80 : MTBUF_Real_AllAddr_gfx80 <0x0c>;
|
|
|
|
defm TBUFFER_STORE_FORMAT_D16_XY_gfx80 : MTBUF_Real_AllAddr_gfx80 <0x0d>;
|
|
|
|
defm TBUFFER_STORE_FORMAT_D16_XYZ_gfx80 : MTBUF_Real_AllAddr_gfx80 <0x0e>;
|
|
|
|
defm TBUFFER_STORE_FORMAT_D16_XYZW_gfx80 : MTBUF_Real_AllAddr_gfx80 <0x0f>;
|
|
|
|
} // End HasUnpackedD16VMem.
|
|
|
|
let SubtargetPredicate = HasPackedD16VMem in {
|
|
|
|
defm TBUFFER_LOAD_FORMAT_D16_X : MTBUF_Real_AllAddr_vi <0x08>;
|
|
|
|
defm TBUFFER_LOAD_FORMAT_D16_XY : MTBUF_Real_AllAddr_vi <0x09>;
|
|
|
|
defm TBUFFER_LOAD_FORMAT_D16_XYZ : MTBUF_Real_AllAddr_vi <0x0a>;
|
|
|
|
defm TBUFFER_LOAD_FORMAT_D16_XYZW : MTBUF_Real_AllAddr_vi <0x0b>;
|
|
|
|
defm TBUFFER_STORE_FORMAT_D16_X : MTBUF_Real_AllAddr_vi <0x0c>;
|
|
|
|
defm TBUFFER_STORE_FORMAT_D16_XY : MTBUF_Real_AllAddr_vi <0x0d>;
|
|
|
|
defm TBUFFER_STORE_FORMAT_D16_XYZ : MTBUF_Real_AllAddr_vi <0x0e>;
|
|
|
|
defm TBUFFER_STORE_FORMAT_D16_XYZW : MTBUF_Real_AllAddr_vi <0x0f>;
|
|
|
|
} // End HasUnpackedD16VMem.
|
2018-12-13 00:15:21 +08:00
|
|
|
|
|
|
|
def MUBUFInfoTable : GenericTable {
|
|
|
|
let FilterClass = "MUBUF_Pseudo";
|
|
|
|
let CppTypeName = "MUBUFInfo";
|
2019-08-18 08:20:43 +08:00
|
|
|
let Fields = ["Opcode", "BaseOpcode", "elements", "has_vaddr", "has_srsrc", "has_soffset"];
|
2018-12-13 00:15:21 +08:00
|
|
|
|
|
|
|
let PrimaryKey = ["Opcode"];
|
|
|
|
let PrimaryKeyName = "getMUBUFOpcodeHelper";
|
|
|
|
}
|
|
|
|
|
|
|
|
def getMUBUFInfoFromOpcode : SearchIndex {
|
|
|
|
let Table = MUBUFInfoTable;
|
|
|
|
let Key = ["Opcode"];
|
|
|
|
}
|
|
|
|
|
2019-08-18 08:20:43 +08:00
|
|
|
def getMUBUFInfoFromBaseOpcodeAndElements : SearchIndex {
|
2018-12-13 00:15:21 +08:00
|
|
|
let Table = MUBUFInfoTable;
|
2019-08-18 08:20:43 +08:00
|
|
|
let Key = ["BaseOpcode", "elements"];
|
2018-12-13 00:15:21 +08:00
|
|
|
}
|