2019-06-11 20:04:32 +08:00
|
|
|
//===-- ARMInstrMVE.td - MVE support for ARM ---------------*- tablegen -*-===//
|
|
|
|
//
|
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//
|
|
|
|
// This file describes the ARM MVE instruction set.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
[ARM] Add MVE vector bit-operations (register inputs).
This includes all the obvious bitwise operations (AND, OR, BIC, ORN,
MVN) in register-to-register forms, and the immediate forms of
AND/OR/BIC/ORN; byte-order reverse instructions; and the VMOVs that
access a single lane of a vector.
Some of those VMOVs (specifically, the ones that access a 32-bit lane)
share an encoding with existing instructions that were disassembled as
accessing half of a d-register (e.g. `vmov.32 r0, d1[0]`), but in
8.1-M they're now written as accessing a quarter of a q-register (e.g.
`vmov.32 r0, q0[2]`). The older syntax is still accepted by the
assembler.
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62673
llvm-svn: 363838
2019-06-20 00:43:53 +08:00
|
|
|
class ExpandImmAsmOp<string shift> : AsmOperandClass {
|
|
|
|
let Name = !strconcat("ExpandImm", shift);
|
|
|
|
let PredicateMethod = !strconcat("isExpImm<", shift, ">");
|
|
|
|
let RenderMethod = "addImmOperands";
|
|
|
|
}
|
|
|
|
class InvertedExpandImmAsmOp<string shift, string size> : AsmOperandClass {
|
|
|
|
let Name = !strconcat("InvertedExpandImm", shift, "_", size);
|
|
|
|
let PredicateMethod = !strconcat("isInvertedExpImm<", shift, ",", size, ">");
|
|
|
|
let RenderMethod = "addImmOperands";
|
|
|
|
}
|
|
|
|
|
|
|
|
class ExpandImm<string shift> : Operand<i32> {
|
|
|
|
let ParserMatchClass = ExpandImmAsmOp<shift>;
|
|
|
|
let EncoderMethod = !strconcat("getExpandedImmOpValue<",shift,",false>");
|
|
|
|
let DecoderMethod = !strconcat("DecodeExpandedImmOperand<",shift,">");
|
|
|
|
let PrintMethod = "printExpandedImmOperand";
|
|
|
|
}
|
|
|
|
class InvertedExpandImm<string shift, string size> : Operand<i32> {
|
|
|
|
let ParserMatchClass = InvertedExpandImmAsmOp<shift, size>;
|
|
|
|
let EncoderMethod = !strconcat("getExpandedImmOpValue<",shift,",true>");
|
|
|
|
let PrintMethod = "printExpandedImmOperand";
|
|
|
|
// No decoder method needed, because this operand type is only used
|
|
|
|
// by aliases (VAND and VORN)
|
|
|
|
}
|
|
|
|
|
|
|
|
def expzero00 : ExpandImm<"0">;
|
|
|
|
def expzero08 : ExpandImm<"8">;
|
|
|
|
def expzero16 : ExpandImm<"16">;
|
|
|
|
def expzero24 : ExpandImm<"24">;
|
|
|
|
|
|
|
|
def expzero00inv16 : InvertedExpandImm<"0", "16">;
|
|
|
|
def expzero08inv16 : InvertedExpandImm<"8", "16">;
|
|
|
|
|
|
|
|
def expzero00inv32 : InvertedExpandImm<"0", "32">;
|
|
|
|
def expzero08inv32 : InvertedExpandImm<"8", "32">;
|
|
|
|
def expzero16inv32 : InvertedExpandImm<"16", "32">;
|
|
|
|
def expzero24inv32 : InvertedExpandImm<"24", "32">;
|
|
|
|
|
[ARM] Set up infrastructure for MVE vector instructions.
This commit prepares the way to start adding the main collection of
MVE instructions, which operate on the 128-bit vector registers.
The most obvious thing that's needed, and the simplest, is to add the
MQPR register class, which is like the existing QPR except that it has
fewer registers in it.
The more complicated part: MVE defines a system of vector predication,
in which instructions operating on 128-bit vector registers can be
constrained to operate on only a subset of the lanes, using a system
of prefix instructions similar to the existing Thumb IT, in that you
have one prefix instruction which designates up to 4 following
instructions as subject to predication, and within that sequence, the
predicate can be inverted by means of T/E suffixes ('Then' / 'Else').
To support instructions of this type, we've added two new Tablegen
classes `vpred_n` and `vpred_r` for standard clusters of MC operands
to add to a predicated instruction. Both include a flag indicating how
the instruction is predicated at all (options are T, E and 'not
predicated'), and an input register field for the register controlling
the set of active lanes. They differ from each other in that `vpred_r`
also includes an input operand for the previous value of the output
register, for instructions that leave inactive lanes unchanged.
`vpred_n` lacks that extra operand; it will be used for instructions
that don't preserve inactive lanes in their output register (either
because inactive lanes are zeroed, as the MVE load instructions do, or
because the output register isn't a vector at all).
This commit also adds the family of prefix instructions themselves
(VPT / VPST), and all the machinery needed to work with them in
assembly and disassembly (e.g. generating the 't' and 'e' mnemonic
suffixes on disassembled instructions within a predicated block)
I've added a couple of demo instructions that derive from the new
Tablegen base classes and use those two operand clusters. The bulk of
the vector instructions will come in followup commits small enough to
be manageable. (One exception is that I've added the full version of
`isMnemonicVPTPredicable` in the AsmParser, because it seemed
pointless to carefully split it up.)
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62669
llvm-svn: 363258
2019-06-13 21:11:13 +08:00
|
|
|
// VPT condition mask
|
|
|
|
def vpt_mask : Operand<i32> {
|
|
|
|
let PrintMethod = "printVPTMask";
|
|
|
|
let ParserMatchClass = it_mask_asmoperand;
|
|
|
|
let EncoderMethod = "getVPTMaskOpValue";
|
|
|
|
let DecoderMethod = "DecodeVPTMaskOperand";
|
|
|
|
}
|
|
|
|
|
|
|
|
// VPT/VCMP restricted predicate for sign invariant types
|
|
|
|
def pred_restricted_i_asmoperand : AsmOperandClass {
|
|
|
|
let Name = "CondCodeRestrictedI";
|
|
|
|
let RenderMethod = "addITCondCodeOperands";
|
|
|
|
let PredicateMethod = "isITCondCodeRestrictedI";
|
|
|
|
let ParserMethod = "parseITCondCode";
|
2019-06-21 19:14:51 +08:00
|
|
|
let DiagnosticString = "condition code for sign-independent integer "#
|
|
|
|
"comparison must be EQ or NE";
|
[ARM] Set up infrastructure for MVE vector instructions.
This commit prepares the way to start adding the main collection of
MVE instructions, which operate on the 128-bit vector registers.
The most obvious thing that's needed, and the simplest, is to add the
MQPR register class, which is like the existing QPR except that it has
fewer registers in it.
The more complicated part: MVE defines a system of vector predication,
in which instructions operating on 128-bit vector registers can be
constrained to operate on only a subset of the lanes, using a system
of prefix instructions similar to the existing Thumb IT, in that you
have one prefix instruction which designates up to 4 following
instructions as subject to predication, and within that sequence, the
predicate can be inverted by means of T/E suffixes ('Then' / 'Else').
To support instructions of this type, we've added two new Tablegen
classes `vpred_n` and `vpred_r` for standard clusters of MC operands
to add to a predicated instruction. Both include a flag indicating how
the instruction is predicated at all (options are T, E and 'not
predicated'), and an input register field for the register controlling
the set of active lanes. They differ from each other in that `vpred_r`
also includes an input operand for the previous value of the output
register, for instructions that leave inactive lanes unchanged.
`vpred_n` lacks that extra operand; it will be used for instructions
that don't preserve inactive lanes in their output register (either
because inactive lanes are zeroed, as the MVE load instructions do, or
because the output register isn't a vector at all).
This commit also adds the family of prefix instructions themselves
(VPT / VPST), and all the machinery needed to work with them in
assembly and disassembly (e.g. generating the 't' and 'e' mnemonic
suffixes on disassembled instructions within a predicated block)
I've added a couple of demo instructions that derive from the new
Tablegen base classes and use those two operand clusters. The bulk of
the vector instructions will come in followup commits small enough to
be manageable. (One exception is that I've added the full version of
`isMnemonicVPTPredicable` in the AsmParser, because it seemed
pointless to carefully split it up.)
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62669
llvm-svn: 363258
2019-06-13 21:11:13 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// VPT/VCMP restricted predicate for signed types
|
|
|
|
def pred_restricted_s_asmoperand : AsmOperandClass {
|
|
|
|
let Name = "CondCodeRestrictedS";
|
|
|
|
let RenderMethod = "addITCondCodeOperands";
|
|
|
|
let PredicateMethod = "isITCondCodeRestrictedS";
|
|
|
|
let ParserMethod = "parseITCondCode";
|
2019-06-21 19:14:51 +08:00
|
|
|
let DiagnosticString = "condition code for signed integer "#
|
|
|
|
"comparison must be EQ, NE, LT, GT, LE or GE";
|
[ARM] Set up infrastructure for MVE vector instructions.
This commit prepares the way to start adding the main collection of
MVE instructions, which operate on the 128-bit vector registers.
The most obvious thing that's needed, and the simplest, is to add the
MQPR register class, which is like the existing QPR except that it has
fewer registers in it.
The more complicated part: MVE defines a system of vector predication,
in which instructions operating on 128-bit vector registers can be
constrained to operate on only a subset of the lanes, using a system
of prefix instructions similar to the existing Thumb IT, in that you
have one prefix instruction which designates up to 4 following
instructions as subject to predication, and within that sequence, the
predicate can be inverted by means of T/E suffixes ('Then' / 'Else').
To support instructions of this type, we've added two new Tablegen
classes `vpred_n` and `vpred_r` for standard clusters of MC operands
to add to a predicated instruction. Both include a flag indicating how
the instruction is predicated at all (options are T, E and 'not
predicated'), and an input register field for the register controlling
the set of active lanes. They differ from each other in that `vpred_r`
also includes an input operand for the previous value of the output
register, for instructions that leave inactive lanes unchanged.
`vpred_n` lacks that extra operand; it will be used for instructions
that don't preserve inactive lanes in their output register (either
because inactive lanes are zeroed, as the MVE load instructions do, or
because the output register isn't a vector at all).
This commit also adds the family of prefix instructions themselves
(VPT / VPST), and all the machinery needed to work with them in
assembly and disassembly (e.g. generating the 't' and 'e' mnemonic
suffixes on disassembled instructions within a predicated block)
I've added a couple of demo instructions that derive from the new
Tablegen base classes and use those two operand clusters. The bulk of
the vector instructions will come in followup commits small enough to
be manageable. (One exception is that I've added the full version of
`isMnemonicVPTPredicable` in the AsmParser, because it seemed
pointless to carefully split it up.)
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62669
llvm-svn: 363258
2019-06-13 21:11:13 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// VPT/VCMP restricted predicate for unsigned types
|
|
|
|
def pred_restricted_u_asmoperand : AsmOperandClass {
|
|
|
|
let Name = "CondCodeRestrictedU";
|
|
|
|
let RenderMethod = "addITCondCodeOperands";
|
|
|
|
let PredicateMethod = "isITCondCodeRestrictedU";
|
|
|
|
let ParserMethod = "parseITCondCode";
|
2019-06-21 19:14:51 +08:00
|
|
|
let DiagnosticString = "condition code for unsigned integer "#
|
|
|
|
"comparison must be EQ, NE, HS or HI";
|
[ARM] Set up infrastructure for MVE vector instructions.
This commit prepares the way to start adding the main collection of
MVE instructions, which operate on the 128-bit vector registers.
The most obvious thing that's needed, and the simplest, is to add the
MQPR register class, which is like the existing QPR except that it has
fewer registers in it.
The more complicated part: MVE defines a system of vector predication,
in which instructions operating on 128-bit vector registers can be
constrained to operate on only a subset of the lanes, using a system
of prefix instructions similar to the existing Thumb IT, in that you
have one prefix instruction which designates up to 4 following
instructions as subject to predication, and within that sequence, the
predicate can be inverted by means of T/E suffixes ('Then' / 'Else').
To support instructions of this type, we've added two new Tablegen
classes `vpred_n` and `vpred_r` for standard clusters of MC operands
to add to a predicated instruction. Both include a flag indicating how
the instruction is predicated at all (options are T, E and 'not
predicated'), and an input register field for the register controlling
the set of active lanes. They differ from each other in that `vpred_r`
also includes an input operand for the previous value of the output
register, for instructions that leave inactive lanes unchanged.
`vpred_n` lacks that extra operand; it will be used for instructions
that don't preserve inactive lanes in their output register (either
because inactive lanes are zeroed, as the MVE load instructions do, or
because the output register isn't a vector at all).
This commit also adds the family of prefix instructions themselves
(VPT / VPST), and all the machinery needed to work with them in
assembly and disassembly (e.g. generating the 't' and 'e' mnemonic
suffixes on disassembled instructions within a predicated block)
I've added a couple of demo instructions that derive from the new
Tablegen base classes and use those two operand clusters. The bulk of
the vector instructions will come in followup commits small enough to
be manageable. (One exception is that I've added the full version of
`isMnemonicVPTPredicable` in the AsmParser, because it seemed
pointless to carefully split it up.)
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62669
llvm-svn: 363258
2019-06-13 21:11:13 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// VPT/VCMP restricted predicate for floating point
|
|
|
|
def pred_restricted_fp_asmoperand : AsmOperandClass {
|
|
|
|
let Name = "CondCodeRestrictedFP";
|
|
|
|
let RenderMethod = "addITCondCodeOperands";
|
|
|
|
let PredicateMethod = "isITCondCodeRestrictedFP";
|
|
|
|
let ParserMethod = "parseITCondCode";
|
2019-06-21 19:14:51 +08:00
|
|
|
let DiagnosticString = "condition code for floating-point "#
|
|
|
|
"comparison must be EQ, NE, LT, GT, LE or GE";
|
[ARM] Set up infrastructure for MVE vector instructions.
This commit prepares the way to start adding the main collection of
MVE instructions, which operate on the 128-bit vector registers.
The most obvious thing that's needed, and the simplest, is to add the
MQPR register class, which is like the existing QPR except that it has
fewer registers in it.
The more complicated part: MVE defines a system of vector predication,
in which instructions operating on 128-bit vector registers can be
constrained to operate on only a subset of the lanes, using a system
of prefix instructions similar to the existing Thumb IT, in that you
have one prefix instruction which designates up to 4 following
instructions as subject to predication, and within that sequence, the
predicate can be inverted by means of T/E suffixes ('Then' / 'Else').
To support instructions of this type, we've added two new Tablegen
classes `vpred_n` and `vpred_r` for standard clusters of MC operands
to add to a predicated instruction. Both include a flag indicating how
the instruction is predicated at all (options are T, E and 'not
predicated'), and an input register field for the register controlling
the set of active lanes. They differ from each other in that `vpred_r`
also includes an input operand for the previous value of the output
register, for instructions that leave inactive lanes unchanged.
`vpred_n` lacks that extra operand; it will be used for instructions
that don't preserve inactive lanes in their output register (either
because inactive lanes are zeroed, as the MVE load instructions do, or
because the output register isn't a vector at all).
This commit also adds the family of prefix instructions themselves
(VPT / VPST), and all the machinery needed to work with them in
assembly and disassembly (e.g. generating the 't' and 'e' mnemonic
suffixes on disassembled instructions within a predicated block)
I've added a couple of demo instructions that derive from the new
Tablegen base classes and use those two operand clusters. The bulk of
the vector instructions will come in followup commits small enough to
be manageable. (One exception is that I've added the full version of
`isMnemonicVPTPredicable` in the AsmParser, because it seemed
pointless to carefully split it up.)
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62669
llvm-svn: 363258
2019-06-13 21:11:13 +08:00
|
|
|
}
|
|
|
|
|
2019-06-21 19:14:51 +08:00
|
|
|
class VCMPPredicateOperand : Operand<i32>;
|
|
|
|
|
|
|
|
def pred_basic_i : VCMPPredicateOperand {
|
[ARM] Set up infrastructure for MVE vector instructions.
This commit prepares the way to start adding the main collection of
MVE instructions, which operate on the 128-bit vector registers.
The most obvious thing that's needed, and the simplest, is to add the
MQPR register class, which is like the existing QPR except that it has
fewer registers in it.
The more complicated part: MVE defines a system of vector predication,
in which instructions operating on 128-bit vector registers can be
constrained to operate on only a subset of the lanes, using a system
of prefix instructions similar to the existing Thumb IT, in that you
have one prefix instruction which designates up to 4 following
instructions as subject to predication, and within that sequence, the
predicate can be inverted by means of T/E suffixes ('Then' / 'Else').
To support instructions of this type, we've added two new Tablegen
classes `vpred_n` and `vpred_r` for standard clusters of MC operands
to add to a predicated instruction. Both include a flag indicating how
the instruction is predicated at all (options are T, E and 'not
predicated'), and an input register field for the register controlling
the set of active lanes. They differ from each other in that `vpred_r`
also includes an input operand for the previous value of the output
register, for instructions that leave inactive lanes unchanged.
`vpred_n` lacks that extra operand; it will be used for instructions
that don't preserve inactive lanes in their output register (either
because inactive lanes are zeroed, as the MVE load instructions do, or
because the output register isn't a vector at all).
This commit also adds the family of prefix instructions themselves
(VPT / VPST), and all the machinery needed to work with them in
assembly and disassembly (e.g. generating the 't' and 'e' mnemonic
suffixes on disassembled instructions within a predicated block)
I've added a couple of demo instructions that derive from the new
Tablegen base classes and use those two operand clusters. The bulk of
the vector instructions will come in followup commits small enough to
be manageable. (One exception is that I've added the full version of
`isMnemonicVPTPredicable` in the AsmParser, because it seemed
pointless to carefully split it up.)
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62669
llvm-svn: 363258
2019-06-13 21:11:13 +08:00
|
|
|
let PrintMethod = "printMandatoryRestrictedPredicateOperand";
|
|
|
|
let ParserMatchClass = pred_restricted_i_asmoperand;
|
|
|
|
let DecoderMethod = "DecodeRestrictedIPredicateOperand";
|
|
|
|
let EncoderMethod = "getRestrictedCondCodeOpValue";
|
|
|
|
}
|
|
|
|
|
2019-06-21 19:14:51 +08:00
|
|
|
def pred_basic_u : VCMPPredicateOperand {
|
[ARM] Set up infrastructure for MVE vector instructions.
This commit prepares the way to start adding the main collection of
MVE instructions, which operate on the 128-bit vector registers.
The most obvious thing that's needed, and the simplest, is to add the
MQPR register class, which is like the existing QPR except that it has
fewer registers in it.
The more complicated part: MVE defines a system of vector predication,
in which instructions operating on 128-bit vector registers can be
constrained to operate on only a subset of the lanes, using a system
of prefix instructions similar to the existing Thumb IT, in that you
have one prefix instruction which designates up to 4 following
instructions as subject to predication, and within that sequence, the
predicate can be inverted by means of T/E suffixes ('Then' / 'Else').
To support instructions of this type, we've added two new Tablegen
classes `vpred_n` and `vpred_r` for standard clusters of MC operands
to add to a predicated instruction. Both include a flag indicating how
the instruction is predicated at all (options are T, E and 'not
predicated'), and an input register field for the register controlling
the set of active lanes. They differ from each other in that `vpred_r`
also includes an input operand for the previous value of the output
register, for instructions that leave inactive lanes unchanged.
`vpred_n` lacks that extra operand; it will be used for instructions
that don't preserve inactive lanes in their output register (either
because inactive lanes are zeroed, as the MVE load instructions do, or
because the output register isn't a vector at all).
This commit also adds the family of prefix instructions themselves
(VPT / VPST), and all the machinery needed to work with them in
assembly and disassembly (e.g. generating the 't' and 'e' mnemonic
suffixes on disassembled instructions within a predicated block)
I've added a couple of demo instructions that derive from the new
Tablegen base classes and use those two operand clusters. The bulk of
the vector instructions will come in followup commits small enough to
be manageable. (One exception is that I've added the full version of
`isMnemonicVPTPredicable` in the AsmParser, because it seemed
pointless to carefully split it up.)
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62669
llvm-svn: 363258
2019-06-13 21:11:13 +08:00
|
|
|
let PrintMethod = "printMandatoryRestrictedPredicateOperand";
|
|
|
|
let ParserMatchClass = pred_restricted_u_asmoperand;
|
|
|
|
let DecoderMethod = "DecodeRestrictedUPredicateOperand";
|
|
|
|
let EncoderMethod = "getRestrictedCondCodeOpValue";
|
|
|
|
}
|
|
|
|
|
2019-06-21 19:14:51 +08:00
|
|
|
def pred_basic_s : VCMPPredicateOperand {
|
[ARM] Set up infrastructure for MVE vector instructions.
This commit prepares the way to start adding the main collection of
MVE instructions, which operate on the 128-bit vector registers.
The most obvious thing that's needed, and the simplest, is to add the
MQPR register class, which is like the existing QPR except that it has
fewer registers in it.
The more complicated part: MVE defines a system of vector predication,
in which instructions operating on 128-bit vector registers can be
constrained to operate on only a subset of the lanes, using a system
of prefix instructions similar to the existing Thumb IT, in that you
have one prefix instruction which designates up to 4 following
instructions as subject to predication, and within that sequence, the
predicate can be inverted by means of T/E suffixes ('Then' / 'Else').
To support instructions of this type, we've added two new Tablegen
classes `vpred_n` and `vpred_r` for standard clusters of MC operands
to add to a predicated instruction. Both include a flag indicating how
the instruction is predicated at all (options are T, E and 'not
predicated'), and an input register field for the register controlling
the set of active lanes. They differ from each other in that `vpred_r`
also includes an input operand for the previous value of the output
register, for instructions that leave inactive lanes unchanged.
`vpred_n` lacks that extra operand; it will be used for instructions
that don't preserve inactive lanes in their output register (either
because inactive lanes are zeroed, as the MVE load instructions do, or
because the output register isn't a vector at all).
This commit also adds the family of prefix instructions themselves
(VPT / VPST), and all the machinery needed to work with them in
assembly and disassembly (e.g. generating the 't' and 'e' mnemonic
suffixes on disassembled instructions within a predicated block)
I've added a couple of demo instructions that derive from the new
Tablegen base classes and use those two operand clusters. The bulk of
the vector instructions will come in followup commits small enough to
be manageable. (One exception is that I've added the full version of
`isMnemonicVPTPredicable` in the AsmParser, because it seemed
pointless to carefully split it up.)
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62669
llvm-svn: 363258
2019-06-13 21:11:13 +08:00
|
|
|
let PrintMethod = "printMandatoryRestrictedPredicateOperand";
|
|
|
|
let ParserMatchClass = pred_restricted_s_asmoperand;
|
|
|
|
let DecoderMethod = "DecodeRestrictedSPredicateOperand";
|
|
|
|
let EncoderMethod = "getRestrictedCondCodeOpValue";
|
|
|
|
}
|
|
|
|
|
2019-06-21 19:14:51 +08:00
|
|
|
def pred_basic_fp : VCMPPredicateOperand {
|
[ARM] Set up infrastructure for MVE vector instructions.
This commit prepares the way to start adding the main collection of
MVE instructions, which operate on the 128-bit vector registers.
The most obvious thing that's needed, and the simplest, is to add the
MQPR register class, which is like the existing QPR except that it has
fewer registers in it.
The more complicated part: MVE defines a system of vector predication,
in which instructions operating on 128-bit vector registers can be
constrained to operate on only a subset of the lanes, using a system
of prefix instructions similar to the existing Thumb IT, in that you
have one prefix instruction which designates up to 4 following
instructions as subject to predication, and within that sequence, the
predicate can be inverted by means of T/E suffixes ('Then' / 'Else').
To support instructions of this type, we've added two new Tablegen
classes `vpred_n` and `vpred_r` for standard clusters of MC operands
to add to a predicated instruction. Both include a flag indicating how
the instruction is predicated at all (options are T, E and 'not
predicated'), and an input register field for the register controlling
the set of active lanes. They differ from each other in that `vpred_r`
also includes an input operand for the previous value of the output
register, for instructions that leave inactive lanes unchanged.
`vpred_n` lacks that extra operand; it will be used for instructions
that don't preserve inactive lanes in their output register (either
because inactive lanes are zeroed, as the MVE load instructions do, or
because the output register isn't a vector at all).
This commit also adds the family of prefix instructions themselves
(VPT / VPST), and all the machinery needed to work with them in
assembly and disassembly (e.g. generating the 't' and 'e' mnemonic
suffixes on disassembled instructions within a predicated block)
I've added a couple of demo instructions that derive from the new
Tablegen base classes and use those two operand clusters. The bulk of
the vector instructions will come in followup commits small enough to
be manageable. (One exception is that I've added the full version of
`isMnemonicVPTPredicable` in the AsmParser, because it seemed
pointless to carefully split it up.)
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62669
llvm-svn: 363258
2019-06-13 21:11:13 +08:00
|
|
|
let PrintMethod = "printMandatoryRestrictedPredicateOperand";
|
|
|
|
let ParserMatchClass = pred_restricted_fp_asmoperand;
|
|
|
|
let DecoderMethod = "DecodeRestrictedFPPredicateOperand";
|
|
|
|
let EncoderMethod = "getRestrictedCondCodeOpValue";
|
|
|
|
}
|
|
|
|
|
2019-06-24 18:00:39 +08:00
|
|
|
// Register list operands for interleaving load/stores
|
|
|
|
def VecList2QAsmOperand : AsmOperandClass {
|
|
|
|
let Name = "VecListTwoMQ";
|
|
|
|
let ParserMethod = "parseVectorList";
|
|
|
|
let RenderMethod = "addMVEVecListOperands";
|
|
|
|
let DiagnosticString = "operand must be a list of two consecutive "#
|
|
|
|
"q-registers in range [q0,q7]";
|
|
|
|
}
|
|
|
|
|
|
|
|
def VecList2Q : RegisterOperand<QQPR, "printMVEVectorListTwoQ"> {
|
|
|
|
let ParserMatchClass = VecList2QAsmOperand;
|
|
|
|
let PrintMethod = "printMVEVectorList<2>";
|
|
|
|
}
|
|
|
|
|
|
|
|
def VecList4QAsmOperand : AsmOperandClass {
|
|
|
|
let Name = "VecListFourMQ";
|
|
|
|
let ParserMethod = "parseVectorList";
|
|
|
|
let RenderMethod = "addMVEVecListOperands";
|
|
|
|
let DiagnosticString = "operand must be a list of four consecutive "#
|
|
|
|
"q-registers in range [q0,q7]";
|
|
|
|
}
|
|
|
|
|
|
|
|
def VecList4Q : RegisterOperand<QQQQPR, "printMVEVectorListFourQ"> {
|
|
|
|
let ParserMatchClass = VecList4QAsmOperand;
|
|
|
|
let PrintMethod = "printMVEVectorList<4>";
|
|
|
|
}
|
|
|
|
|
[ARM] Add MVE vector load/store instructions.
This adds the rest of the vector memory access instructions. It
includes contiguous loads/stores, with an ordinary addressing mode
such as [r0,#offset] (plus writeback variants); gather loads and
scatter stores with a scalar base address register and a vector of
offsets from it (written [r0,q1] or similar); and gather/scatters with
a vector of base addresses (written [q0,#offset], again with
writeback). Additionally, some of the loads can widen each loaded
value into a larger vector lane, and the corresponding stores narrow
them again.
To implement these, we also have to add the addressing modes they
need. Also, in AsmParser, the `isMem` query function now has
subqueries `isGPRMem` and `isMVEMem`, according to which kind of base
register is used by a given memory access operand.
I've also had to add an extra check in `checkTargetMatchPredicate` in
the AsmParser, without which our last-minute check of `rGPR` register
operands against SP and PC was failing an assertion because Tablegen
had inserted an immediate 0 in place of one of a pair of tied register
operands. (This matches the way the corresponding check for `MCK_rGPR`
in `validateTargetOperandClass` is guarded.) Apparently the MVE load
instructions were the first to have ever triggered this assertion, but
I think only because they were the first to have a combination of the
usual Arm pre/post writeback system and the `rGPR` class in particular.
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62680
llvm-svn: 364291
2019-06-25 19:24:18 +08:00
|
|
|
// taddrmode_imm7 := reg[r0-r7] +/- (imm7 << shift)
|
|
|
|
class TMemImm7ShiftOffsetAsmOperand<int shift> : AsmOperandClass {
|
|
|
|
let Name = "TMemImm7Shift"#shift#"Offset";
|
|
|
|
let PredicateMethod = "isMemImm7ShiftedOffset<"#shift#",ARM::tGPRRegClassID>";
|
|
|
|
let RenderMethod = "addMemImmOffsetOperands";
|
|
|
|
}
|
|
|
|
|
2019-09-17 23:32:28 +08:00
|
|
|
class taddrmode_imm7<int shift> : MemOperand,
|
|
|
|
ComplexPattern<i32, 2, "SelectTAddrModeImm7<"#shift#">", []> {
|
[ARM] Add MVE vector load/store instructions.
This adds the rest of the vector memory access instructions. It
includes contiguous loads/stores, with an ordinary addressing mode
such as [r0,#offset] (plus writeback variants); gather loads and
scatter stores with a scalar base address register and a vector of
offsets from it (written [r0,q1] or similar); and gather/scatters with
a vector of base addresses (written [q0,#offset], again with
writeback). Additionally, some of the loads can widen each loaded
value into a larger vector lane, and the corresponding stores narrow
them again.
To implement these, we also have to add the addressing modes they
need. Also, in AsmParser, the `isMem` query function now has
subqueries `isGPRMem` and `isMVEMem`, according to which kind of base
register is used by a given memory access operand.
I've also had to add an extra check in `checkTargetMatchPredicate` in
the AsmParser, without which our last-minute check of `rGPR` register
operands against SP and PC was failing an assertion because Tablegen
had inserted an immediate 0 in place of one of a pair of tied register
operands. (This matches the way the corresponding check for `MCK_rGPR`
in `validateTargetOperandClass` is guarded.) Apparently the MVE load
instructions were the first to have ever triggered this assertion, but
I think only because they were the first to have a combination of the
usual Arm pre/post writeback system and the `rGPR` class in particular.
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62680
llvm-svn: 364291
2019-06-25 19:24:18 +08:00
|
|
|
let ParserMatchClass = TMemImm7ShiftOffsetAsmOperand<shift>;
|
|
|
|
// They are printed the same way as the T2 imm8 version
|
|
|
|
let PrintMethod = "printT2AddrModeImm8Operand<false>";
|
|
|
|
// This can also be the same as the T2 version.
|
|
|
|
let EncoderMethod = "getT2AddrModeImmOpValue<7,"#shift#">";
|
|
|
|
let DecoderMethod = "DecodeTAddrModeImm7<"#shift#">";
|
|
|
|
let MIOperandInfo = (ops tGPR:$base, i32imm:$offsimm);
|
|
|
|
}
|
|
|
|
|
|
|
|
// t2addrmode_imm7 := reg +/- (imm7)
|
|
|
|
class MemImm7ShiftOffsetAsmOperand<int shift> : AsmOperandClass {
|
|
|
|
let Name = "MemImm7Shift"#shift#"Offset";
|
|
|
|
let PredicateMethod = "isMemImm7ShiftedOffset<" # shift #
|
|
|
|
",ARM::GPRnopcRegClassID>";
|
|
|
|
let RenderMethod = "addMemImmOffsetOperands";
|
|
|
|
}
|
|
|
|
|
|
|
|
def MemImm7Shift0OffsetAsmOperand : MemImm7ShiftOffsetAsmOperand<0>;
|
|
|
|
def MemImm7Shift1OffsetAsmOperand : MemImm7ShiftOffsetAsmOperand<1>;
|
|
|
|
def MemImm7Shift2OffsetAsmOperand : MemImm7ShiftOffsetAsmOperand<2>;
|
|
|
|
class T2AddrMode_Imm7<int shift> : MemOperand,
|
|
|
|
ComplexPattern<i32, 2, "SelectT2AddrModeImm7<"#shift#">", []> {
|
|
|
|
let EncoderMethod = "getT2AddrModeImmOpValue<7,"#shift#">";
|
|
|
|
let DecoderMethod = "DecodeT2AddrModeImm7<"#shift#", 0>";
|
|
|
|
let ParserMatchClass =
|
|
|
|
!cast<AsmOperandClass>("MemImm7Shift"#shift#"OffsetAsmOperand");
|
|
|
|
let MIOperandInfo = (ops GPRnopc:$base, i32imm:$offsimm);
|
|
|
|
}
|
|
|
|
|
|
|
|
class t2addrmode_imm7<int shift> : T2AddrMode_Imm7<shift> {
|
|
|
|
// They are printed the same way as the imm8 version
|
|
|
|
let PrintMethod = "printT2AddrModeImm8Operand<false>";
|
|
|
|
}
|
|
|
|
|
|
|
|
class MemImm7ShiftOffsetWBAsmOperand<int shift> : AsmOperandClass {
|
|
|
|
let Name = "MemImm7Shift"#shift#"OffsetWB";
|
|
|
|
let PredicateMethod = "isMemImm7ShiftedOffset<" # shift #
|
|
|
|
",ARM::rGPRRegClassID>";
|
|
|
|
let RenderMethod = "addMemImmOffsetOperands";
|
|
|
|
}
|
|
|
|
|
|
|
|
def MemImm7Shift0OffsetWBAsmOperand : MemImm7ShiftOffsetWBAsmOperand<0>;
|
|
|
|
def MemImm7Shift1OffsetWBAsmOperand : MemImm7ShiftOffsetWBAsmOperand<1>;
|
|
|
|
def MemImm7Shift2OffsetWBAsmOperand : MemImm7ShiftOffsetWBAsmOperand<2>;
|
|
|
|
|
|
|
|
class t2addrmode_imm7_pre<int shift> : T2AddrMode_Imm7<shift> {
|
|
|
|
// They are printed the same way as the imm8 version
|
|
|
|
let PrintMethod = "printT2AddrModeImm8Operand<true>";
|
|
|
|
let ParserMatchClass =
|
|
|
|
!cast<AsmOperandClass>("MemImm7Shift"#shift#"OffsetWBAsmOperand");
|
|
|
|
let DecoderMethod = "DecodeT2AddrModeImm7<"#shift#", 1>";
|
|
|
|
let MIOperandInfo = (ops rGPR:$base, i32imm:$offsim);
|
|
|
|
}
|
|
|
|
|
|
|
|
class t2am_imm7shiftOffsetAsmOperand<int shift>
|
|
|
|
: AsmOperandClass { let Name = "Imm7Shift"#shift; }
|
|
|
|
def t2am_imm7shift0OffsetAsmOperand : t2am_imm7shiftOffsetAsmOperand<0>;
|
|
|
|
def t2am_imm7shift1OffsetAsmOperand : t2am_imm7shiftOffsetAsmOperand<1>;
|
|
|
|
def t2am_imm7shift2OffsetAsmOperand : t2am_imm7shiftOffsetAsmOperand<2>;
|
|
|
|
|
2019-08-08 23:27:58 +08:00
|
|
|
class t2am_imm7_offset<int shift> : MemOperand,
|
|
|
|
ComplexPattern<i32, 1, "SelectT2AddrModeImm7Offset<"#shift#">",
|
|
|
|
[], [SDNPWantRoot]> {
|
[ARM] Add MVE vector load/store instructions.
This adds the rest of the vector memory access instructions. It
includes contiguous loads/stores, with an ordinary addressing mode
such as [r0,#offset] (plus writeback variants); gather loads and
scatter stores with a scalar base address register and a vector of
offsets from it (written [r0,q1] or similar); and gather/scatters with
a vector of base addresses (written [q0,#offset], again with
writeback). Additionally, some of the loads can widen each loaded
value into a larger vector lane, and the corresponding stores narrow
them again.
To implement these, we also have to add the addressing modes they
need. Also, in AsmParser, the `isMem` query function now has
subqueries `isGPRMem` and `isMVEMem`, according to which kind of base
register is used by a given memory access operand.
I've also had to add an extra check in `checkTargetMatchPredicate` in
the AsmParser, without which our last-minute check of `rGPR` register
operands against SP and PC was failing an assertion because Tablegen
had inserted an immediate 0 in place of one of a pair of tied register
operands. (This matches the way the corresponding check for `MCK_rGPR`
in `validateTargetOperandClass` is guarded.) Apparently the MVE load
instructions were the first to have ever triggered this assertion, but
I think only because they were the first to have a combination of the
usual Arm pre/post writeback system and the `rGPR` class in particular.
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62680
llvm-svn: 364291
2019-06-25 19:24:18 +08:00
|
|
|
// They are printed the same way as the imm8 version
|
|
|
|
let PrintMethod = "printT2AddrModeImm8OffsetOperand";
|
|
|
|
let ParserMatchClass =
|
|
|
|
!cast<AsmOperandClass>("t2am_imm7shift"#shift#"OffsetAsmOperand");
|
|
|
|
let EncoderMethod = "getT2ScaledImmOpValue<7,"#shift#">";
|
|
|
|
let DecoderMethod = "DecodeT2Imm7<"#shift#">";
|
|
|
|
}
|
|
|
|
|
|
|
|
// Operands for gather/scatter loads of the form [Rbase, Qoffsets]
|
|
|
|
class MemRegRQOffsetAsmOperand<int shift> : AsmOperandClass {
|
|
|
|
let Name = "MemRegRQS"#shift#"Offset";
|
|
|
|
let PredicateMethod = "isMemRegRQOffset<"#shift#">";
|
|
|
|
let RenderMethod = "addMemRegRQOffsetOperands";
|
|
|
|
}
|
|
|
|
|
|
|
|
def MemRegRQS0OffsetAsmOperand : MemRegRQOffsetAsmOperand<0>;
|
|
|
|
def MemRegRQS1OffsetAsmOperand : MemRegRQOffsetAsmOperand<1>;
|
|
|
|
def MemRegRQS2OffsetAsmOperand : MemRegRQOffsetAsmOperand<2>;
|
|
|
|
def MemRegRQS3OffsetAsmOperand : MemRegRQOffsetAsmOperand<3>;
|
|
|
|
|
|
|
|
// mve_addr_rq_shift := reg + vreg{ << UXTW #shift}
|
|
|
|
class mve_addr_rq_shift<int shift> : MemOperand {
|
|
|
|
let EncoderMethod = "getMveAddrModeRQOpValue";
|
|
|
|
let PrintMethod = "printMveAddrModeRQOperand<"#shift#">";
|
|
|
|
let ParserMatchClass =
|
|
|
|
!cast<AsmOperandClass>("MemRegRQS"#shift#"OffsetAsmOperand");
|
|
|
|
let DecoderMethod = "DecodeMveAddrModeRQ";
|
|
|
|
let MIOperandInfo = (ops GPRnopc:$base, MQPR:$offsreg);
|
|
|
|
}
|
|
|
|
|
|
|
|
class MemRegQOffsetAsmOperand<int shift> : AsmOperandClass {
|
|
|
|
let Name = "MemRegQS"#shift#"Offset";
|
|
|
|
let PredicateMethod = "isMemRegQOffset<"#shift#">";
|
|
|
|
let RenderMethod = "addMemImmOffsetOperands";
|
|
|
|
}
|
|
|
|
|
|
|
|
def MemRegQS2OffsetAsmOperand : MemRegQOffsetAsmOperand<2>;
|
|
|
|
def MemRegQS3OffsetAsmOperand : MemRegQOffsetAsmOperand<3>;
|
|
|
|
|
|
|
|
// mve_addr_q_shift := vreg {+ #imm7s2/4}
|
|
|
|
class mve_addr_q_shift<int shift> : MemOperand {
|
|
|
|
let EncoderMethod = "getMveAddrModeQOpValue<"#shift#">";
|
|
|
|
// Can be printed same way as other reg + imm operands
|
|
|
|
let PrintMethod = "printT2AddrModeImm8Operand<false>";
|
|
|
|
let ParserMatchClass =
|
|
|
|
!cast<AsmOperandClass>("MemRegQS"#shift#"OffsetAsmOperand");
|
|
|
|
let DecoderMethod = "DecodeMveAddrModeQ<"#shift#">";
|
|
|
|
let MIOperandInfo = (ops MQPR:$base, i32imm:$imm);
|
|
|
|
}
|
|
|
|
|
[ARM] Begin adding IR intrinsics for MVE instructions.
This commit, together with the next few, will add a representative
sample of the kind of IR intrinsics that we'll need in order to
implement the user-facing ACLE intrinsics for MVE. Supporting all of
them will take more work; the intention of this initial series of
commits is to implement an intrinsic or two from lots of different
categories, as examples and proofs of concept.
This initial commit introduces a small number of IR intrinsics for
instructions simple enough that they can use Tablegen ISel patterns:
the predicated versions of the VADD and VSUB instructions (both
integer and FP), VMIN and VMAX, and the float->half VCVT instruction
(predicated and unpredicated).
When using VPT-predicated instructions in automatic code generation,
it will be convenient to specify the predicate value as a vector of
the appropriate number of i1. To make it easy to specify all sizes of
an instruction in one go and give each one the matching predicate
vector type, I've added a system of Tablegen informational records
describing MVE's vector types: each one gives the underlying LLVM IR
ValueType (which may not be the same if the MVE vector is of
explicitly signed or unsigned integers) and an appropriate vNi1 to use
as the predicate vector.
(Also, those info records include the usual encoding for the types, so
that as we add associations between each instruction encoding and one
of the new `MVEVectorVTInfo` records, we can remove some of the
existing template parameters and replace them with references to the
vector type info's fields.)
The user-facing ACLE intrinsics will receive a predicate mask as a
16-bit integer, so I've also provided a pair of intrinsics i2v and
v2i, to convert between an integer and a vector of i1 by just changing
the register class.
Reviewers: dmgreen, miyuki, ostannard
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D67158
2019-10-08 00:00:51 +08:00
|
|
|
// A family of classes wrapping up information about the vector types
|
|
|
|
// used by MVE.
|
|
|
|
class MVEVectorVTInfo<ValueType vec, ValueType pred, bits<2> size,
|
[ARM,MVE] Add intrinsics for gather/scatter load/stores.
This patch adds two new families of intrinsics, both of which are
memory accesses taking a vector of locations to load from / store to.
The vldrq_gather_base / vstrq_scatter_base intrinsics take a vector of
base addresses, and an immediate offset to be added consistently to
each one. vldrq_gather_offset / vstrq_scatter_offset take a scalar
base address, and a vector of offsets to add to it. The
'shifted_offset' variants also multiply each offset by the element
size type, so that the vector is effectively of array indices.
At the IR level, these operations are represented by a single set of
four IR intrinsics: {gather,scatter} × {base,offset}. The other
details (signed/unsigned, shift, and memory element size as opposed to
vector element size) are all specified by IR intrinsic polymorphism
and immediate operands, because that made the selection job easier
than making a huge family of similarly named intrinsics.
I considered using the standard IR representations such as
llvm.masked.gather, but they're not a good fit. In order to use
llvm.masked.gather to represent a gather_offset load with element size
smaller than a pointer, you'd have to expand the <8 x i16> vector of
offsets into an <8 x i16*> vector of pointers, which would be split up
during legalization, so you'd spend most of your time undoing the mess
it had made. Also, ISel support for llvm.masked.gather would be easy
enough in a trivial way (you can expand it into a gather-base load
with a zero immediate offset), but instruction-selecting lots of
fiddly idioms back into all the _other_ MVE load instructions would be
much more work. So I think dedicated IR intrinsics are the more
sensible approach, at least for the moment.
On the clang tablegen side, I've added two new features to the
Tablegen source accepted by MveEmitter: a 'CopyKind' type node for
defining a type that varies with the parameter type (it lets you ask
for an unsigned integer type of the same width as the parameter), and
an 'unsignedflag' value node for passing an immediate IR operand which
is 0 for a signed integer type or 1 for an unsigned one. That lets me
write each kind of intrinsic just once and get all its subtypes and
immediate arguments generated automatically.
Also I've tweaked the handling of pointer-typed values in the code
generation part of MveEmitter: they're generated as Address rather
than Value (i.e. including an alignment) so that they can be given to
the ordinary IR load and store operations, but I'd omitted the code to
convert them back to Value when they're going to be used as an
argument to an IR intrinsic.
On the MC side, I've enhanced MVEVectorVTInfo so that it can tell you
not only the full assembly-language suffix for a given vector type
(like 's32' or 'u16') but also the numeric-only one used by store
instructions (just '32' or '16').
Reviewers: dmgreen
Subscribers: kristof.beyls, hiraditya, cfe-commits, llvm-commits
Tags: #clang, #llvm
Differential Revision: https://reviews.llvm.org/D69791
2019-11-01 01:02:07 +08:00
|
|
|
string suffixletter, bit unsigned> {
|
[ARM] Begin adding IR intrinsics for MVE instructions.
This commit, together with the next few, will add a representative
sample of the kind of IR intrinsics that we'll need in order to
implement the user-facing ACLE intrinsics for MVE. Supporting all of
them will take more work; the intention of this initial series of
commits is to implement an intrinsic or two from lots of different
categories, as examples and proofs of concept.
This initial commit introduces a small number of IR intrinsics for
instructions simple enough that they can use Tablegen ISel patterns:
the predicated versions of the VADD and VSUB instructions (both
integer and FP), VMIN and VMAX, and the float->half VCVT instruction
(predicated and unpredicated).
When using VPT-predicated instructions in automatic code generation,
it will be convenient to specify the predicate value as a vector of
the appropriate number of i1. To make it easy to specify all sizes of
an instruction in one go and give each one the matching predicate
vector type, I've added a system of Tablegen informational records
describing MVE's vector types: each one gives the underlying LLVM IR
ValueType (which may not be the same if the MVE vector is of
explicitly signed or unsigned integers) and an appropriate vNi1 to use
as the predicate vector.
(Also, those info records include the usual encoding for the types, so
that as we add associations between each instruction encoding and one
of the new `MVEVectorVTInfo` records, we can remove some of the
existing template parameters and replace them with references to the
vector type info's fields.)
The user-facing ACLE intrinsics will receive a predicate mask as a
16-bit integer, so I've also provided a pair of intrinsics i2v and
v2i, to convert between an integer and a vector of i1 by just changing
the register class.
Reviewers: dmgreen, miyuki, ostannard
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D67158
2019-10-08 00:00:51 +08:00
|
|
|
// The LLVM ValueType representing the vector, so we can use it in
|
|
|
|
// ISel patterns.
|
|
|
|
ValueType Vec = vec;
|
|
|
|
|
|
|
|
// An LLVM ValueType representing a corresponding vector of
|
|
|
|
// predicate bits, for use in ISel patterns that handle an IR
|
|
|
|
// intrinsic describing the predicated form of the instruction.
|
|
|
|
//
|
|
|
|
// Usually, for a vector of N things, this will be vNi1. But for
|
|
|
|
// vectors of 2 values, we make an exception, and use v4i1 instead
|
|
|
|
// of v2i1. Rationale: MVE codegen doesn't support doing all the
|
|
|
|
// auxiliary operations on v2i1 (vector shuffles etc), and also,
|
|
|
|
// there's no MVE compare instruction that will _generate_ v2i1
|
|
|
|
// directly.
|
|
|
|
ValueType Pred = pred;
|
|
|
|
|
|
|
|
// The most common representation of the vector element size in MVE
|
|
|
|
// instruction encodings: a 2-bit value V representing an (8<<V)-bit
|
|
|
|
// vector element.
|
|
|
|
bits<2> Size = size;
|
|
|
|
|
|
|
|
// For vectors explicitly mentioning a signedness of integers: 0 for
|
|
|
|
// signed and 1 for unsigned. For anything else, undefined.
|
|
|
|
bit Unsigned = unsigned;
|
|
|
|
|
[ARM,MVE] Add intrinsics for gather/scatter load/stores.
This patch adds two new families of intrinsics, both of which are
memory accesses taking a vector of locations to load from / store to.
The vldrq_gather_base / vstrq_scatter_base intrinsics take a vector of
base addresses, and an immediate offset to be added consistently to
each one. vldrq_gather_offset / vstrq_scatter_offset take a scalar
base address, and a vector of offsets to add to it. The
'shifted_offset' variants also multiply each offset by the element
size type, so that the vector is effectively of array indices.
At the IR level, these operations are represented by a single set of
four IR intrinsics: {gather,scatter} × {base,offset}. The other
details (signed/unsigned, shift, and memory element size as opposed to
vector element size) are all specified by IR intrinsic polymorphism
and immediate operands, because that made the selection job easier
than making a huge family of similarly named intrinsics.
I considered using the standard IR representations such as
llvm.masked.gather, but they're not a good fit. In order to use
llvm.masked.gather to represent a gather_offset load with element size
smaller than a pointer, you'd have to expand the <8 x i16> vector of
offsets into an <8 x i16*> vector of pointers, which would be split up
during legalization, so you'd spend most of your time undoing the mess
it had made. Also, ISel support for llvm.masked.gather would be easy
enough in a trivial way (you can expand it into a gather-base load
with a zero immediate offset), but instruction-selecting lots of
fiddly idioms back into all the _other_ MVE load instructions would be
much more work. So I think dedicated IR intrinsics are the more
sensible approach, at least for the moment.
On the clang tablegen side, I've added two new features to the
Tablegen source accepted by MveEmitter: a 'CopyKind' type node for
defining a type that varies with the parameter type (it lets you ask
for an unsigned integer type of the same width as the parameter), and
an 'unsignedflag' value node for passing an immediate IR operand which
is 0 for a signed integer type or 1 for an unsigned one. That lets me
write each kind of intrinsic just once and get all its subtypes and
immediate arguments generated automatically.
Also I've tweaked the handling of pointer-typed values in the code
generation part of MveEmitter: they're generated as Address rather
than Value (i.e. including an alignment) so that they can be given to
the ordinary IR load and store operations, but I'd omitted the code to
convert them back to Value when they're going to be used as an
argument to an IR intrinsic.
On the MC side, I've enhanced MVEVectorVTInfo so that it can tell you
not only the full assembly-language suffix for a given vector type
(like 's32' or 'u16') but also the numeric-only one used by store
instructions (just '32' or '16').
Reviewers: dmgreen
Subscribers: kristof.beyls, hiraditya, cfe-commits, llvm-commits
Tags: #clang, #llvm
Differential Revision: https://reviews.llvm.org/D69791
2019-11-01 01:02:07 +08:00
|
|
|
// The number of bits in a vector element, in integer form.
|
|
|
|
int LaneBits = !shl(8, Size);
|
|
|
|
|
|
|
|
// The suffix used in assembly language on an instruction operating
|
|
|
|
// on this lane if it only cares about number of bits.
|
|
|
|
string BitsSuffix = !cast<string>(LaneBits);
|
|
|
|
|
|
|
|
// The suffix used on an instruction that mentions the whole type.
|
|
|
|
string Suffix = suffixletter ## BitsSuffix;
|
[ARM] Begin adding IR intrinsics for MVE instructions.
This commit, together with the next few, will add a representative
sample of the kind of IR intrinsics that we'll need in order to
implement the user-facing ACLE intrinsics for MVE. Supporting all of
them will take more work; the intention of this initial series of
commits is to implement an intrinsic or two from lots of different
categories, as examples and proofs of concept.
This initial commit introduces a small number of IR intrinsics for
instructions simple enough that they can use Tablegen ISel patterns:
the predicated versions of the VADD and VSUB instructions (both
integer and FP), VMIN and VMAX, and the float->half VCVT instruction
(predicated and unpredicated).
When using VPT-predicated instructions in automatic code generation,
it will be convenient to specify the predicate value as a vector of
the appropriate number of i1. To make it easy to specify all sizes of
an instruction in one go and give each one the matching predicate
vector type, I've added a system of Tablegen informational records
describing MVE's vector types: each one gives the underlying LLVM IR
ValueType (which may not be the same if the MVE vector is of
explicitly signed or unsigned integers) and an appropriate vNi1 to use
as the predicate vector.
(Also, those info records include the usual encoding for the types, so
that as we add associations between each instruction encoding and one
of the new `MVEVectorVTInfo` records, we can remove some of the
existing template parameters and replace them with references to the
vector type info's fields.)
The user-facing ACLE intrinsics will receive a predicate mask as a
16-bit integer, so I've also provided a pair of intrinsics i2v and
v2i, to convert between an integer and a vector of i1 by just changing
the register class.
Reviewers: dmgreen, miyuki, ostannard
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D67158
2019-10-08 00:00:51 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// Integer vector types that don't treat signed and unsigned differently.
|
[ARM,MVE] Add intrinsics for gather/scatter load/stores.
This patch adds two new families of intrinsics, both of which are
memory accesses taking a vector of locations to load from / store to.
The vldrq_gather_base / vstrq_scatter_base intrinsics take a vector of
base addresses, and an immediate offset to be added consistently to
each one. vldrq_gather_offset / vstrq_scatter_offset take a scalar
base address, and a vector of offsets to add to it. The
'shifted_offset' variants also multiply each offset by the element
size type, so that the vector is effectively of array indices.
At the IR level, these operations are represented by a single set of
four IR intrinsics: {gather,scatter} × {base,offset}. The other
details (signed/unsigned, shift, and memory element size as opposed to
vector element size) are all specified by IR intrinsic polymorphism
and immediate operands, because that made the selection job easier
than making a huge family of similarly named intrinsics.
I considered using the standard IR representations such as
llvm.masked.gather, but they're not a good fit. In order to use
llvm.masked.gather to represent a gather_offset load with element size
smaller than a pointer, you'd have to expand the <8 x i16> vector of
offsets into an <8 x i16*> vector of pointers, which would be split up
during legalization, so you'd spend most of your time undoing the mess
it had made. Also, ISel support for llvm.masked.gather would be easy
enough in a trivial way (you can expand it into a gather-base load
with a zero immediate offset), but instruction-selecting lots of
fiddly idioms back into all the _other_ MVE load instructions would be
much more work. So I think dedicated IR intrinsics are the more
sensible approach, at least for the moment.
On the clang tablegen side, I've added two new features to the
Tablegen source accepted by MveEmitter: a 'CopyKind' type node for
defining a type that varies with the parameter type (it lets you ask
for an unsigned integer type of the same width as the parameter), and
an 'unsignedflag' value node for passing an immediate IR operand which
is 0 for a signed integer type or 1 for an unsigned one. That lets me
write each kind of intrinsic just once and get all its subtypes and
immediate arguments generated automatically.
Also I've tweaked the handling of pointer-typed values in the code
generation part of MveEmitter: they're generated as Address rather
than Value (i.e. including an alignment) so that they can be given to
the ordinary IR load and store operations, but I'd omitted the code to
convert them back to Value when they're going to be used as an
argument to an IR intrinsic.
On the MC side, I've enhanced MVEVectorVTInfo so that it can tell you
not only the full assembly-language suffix for a given vector type
(like 's32' or 'u16') but also the numeric-only one used by store
instructions (just '32' or '16').
Reviewers: dmgreen
Subscribers: kristof.beyls, hiraditya, cfe-commits, llvm-commits
Tags: #clang, #llvm
Differential Revision: https://reviews.llvm.org/D69791
2019-11-01 01:02:07 +08:00
|
|
|
def MVE_v16i8 : MVEVectorVTInfo<v16i8, v16i1, 0b00, "i", ?>;
|
|
|
|
def MVE_v8i16 : MVEVectorVTInfo<v8i16, v8i1, 0b01, "i", ?>;
|
|
|
|
def MVE_v4i32 : MVEVectorVTInfo<v4i32, v4i1, 0b10, "i", ?>;
|
|
|
|
def MVE_v2i64 : MVEVectorVTInfo<v2i64, v4i1, 0b11, "i", ?>;
|
[ARM] Begin adding IR intrinsics for MVE instructions.
This commit, together with the next few, will add a representative
sample of the kind of IR intrinsics that we'll need in order to
implement the user-facing ACLE intrinsics for MVE. Supporting all of
them will take more work; the intention of this initial series of
commits is to implement an intrinsic or two from lots of different
categories, as examples and proofs of concept.
This initial commit introduces a small number of IR intrinsics for
instructions simple enough that they can use Tablegen ISel patterns:
the predicated versions of the VADD and VSUB instructions (both
integer and FP), VMIN and VMAX, and the float->half VCVT instruction
(predicated and unpredicated).
When using VPT-predicated instructions in automatic code generation,
it will be convenient to specify the predicate value as a vector of
the appropriate number of i1. To make it easy to specify all sizes of
an instruction in one go and give each one the matching predicate
vector type, I've added a system of Tablegen informational records
describing MVE's vector types: each one gives the underlying LLVM IR
ValueType (which may not be the same if the MVE vector is of
explicitly signed or unsigned integers) and an appropriate vNi1 to use
as the predicate vector.
(Also, those info records include the usual encoding for the types, so
that as we add associations between each instruction encoding and one
of the new `MVEVectorVTInfo` records, we can remove some of the
existing template parameters and replace them with references to the
vector type info's fields.)
The user-facing ACLE intrinsics will receive a predicate mask as a
16-bit integer, so I've also provided a pair of intrinsics i2v and
v2i, to convert between an integer and a vector of i1 by just changing
the register class.
Reviewers: dmgreen, miyuki, ostannard
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D67158
2019-10-08 00:00:51 +08:00
|
|
|
|
|
|
|
// Explicitly signed and unsigned integer vectors. They map to the
|
|
|
|
// same set of LLVM ValueTypes as above, but are represented
|
|
|
|
// differently in assembly and instruction encodings.
|
[ARM,MVE] Add intrinsics for gather/scatter load/stores.
This patch adds two new families of intrinsics, both of which are
memory accesses taking a vector of locations to load from / store to.
The vldrq_gather_base / vstrq_scatter_base intrinsics take a vector of
base addresses, and an immediate offset to be added consistently to
each one. vldrq_gather_offset / vstrq_scatter_offset take a scalar
base address, and a vector of offsets to add to it. The
'shifted_offset' variants also multiply each offset by the element
size type, so that the vector is effectively of array indices.
At the IR level, these operations are represented by a single set of
four IR intrinsics: {gather,scatter} × {base,offset}. The other
details (signed/unsigned, shift, and memory element size as opposed to
vector element size) are all specified by IR intrinsic polymorphism
and immediate operands, because that made the selection job easier
than making a huge family of similarly named intrinsics.
I considered using the standard IR representations such as
llvm.masked.gather, but they're not a good fit. In order to use
llvm.masked.gather to represent a gather_offset load with element size
smaller than a pointer, you'd have to expand the <8 x i16> vector of
offsets into an <8 x i16*> vector of pointers, which would be split up
during legalization, so you'd spend most of your time undoing the mess
it had made. Also, ISel support for llvm.masked.gather would be easy
enough in a trivial way (you can expand it into a gather-base load
with a zero immediate offset), but instruction-selecting lots of
fiddly idioms back into all the _other_ MVE load instructions would be
much more work. So I think dedicated IR intrinsics are the more
sensible approach, at least for the moment.
On the clang tablegen side, I've added two new features to the
Tablegen source accepted by MveEmitter: a 'CopyKind' type node for
defining a type that varies with the parameter type (it lets you ask
for an unsigned integer type of the same width as the parameter), and
an 'unsignedflag' value node for passing an immediate IR operand which
is 0 for a signed integer type or 1 for an unsigned one. That lets me
write each kind of intrinsic just once and get all its subtypes and
immediate arguments generated automatically.
Also I've tweaked the handling of pointer-typed values in the code
generation part of MveEmitter: they're generated as Address rather
than Value (i.e. including an alignment) so that they can be given to
the ordinary IR load and store operations, but I'd omitted the code to
convert them back to Value when they're going to be used as an
argument to an IR intrinsic.
On the MC side, I've enhanced MVEVectorVTInfo so that it can tell you
not only the full assembly-language suffix for a given vector type
(like 's32' or 'u16') but also the numeric-only one used by store
instructions (just '32' or '16').
Reviewers: dmgreen
Subscribers: kristof.beyls, hiraditya, cfe-commits, llvm-commits
Tags: #clang, #llvm
Differential Revision: https://reviews.llvm.org/D69791
2019-11-01 01:02:07 +08:00
|
|
|
def MVE_v16s8 : MVEVectorVTInfo<v16i8, v16i1, 0b00, "s", 0b0>;
|
|
|
|
def MVE_v8s16 : MVEVectorVTInfo<v8i16, v8i1, 0b01, "s", 0b0>;
|
|
|
|
def MVE_v4s32 : MVEVectorVTInfo<v4i32, v4i1, 0b10, "s", 0b0>;
|
|
|
|
def MVE_v2s64 : MVEVectorVTInfo<v2i64, v4i1, 0b11, "s", 0b0>;
|
|
|
|
def MVE_v16u8 : MVEVectorVTInfo<v16i8, v16i1, 0b00, "u", 0b1>;
|
|
|
|
def MVE_v8u16 : MVEVectorVTInfo<v8i16, v8i1, 0b01, "u", 0b1>;
|
|
|
|
def MVE_v4u32 : MVEVectorVTInfo<v4i32, v4i1, 0b10, "u", 0b1>;
|
|
|
|
def MVE_v2u64 : MVEVectorVTInfo<v2i64, v4i1, 0b11, "u", 0b1>;
|
[ARM] Begin adding IR intrinsics for MVE instructions.
This commit, together with the next few, will add a representative
sample of the kind of IR intrinsics that we'll need in order to
implement the user-facing ACLE intrinsics for MVE. Supporting all of
them will take more work; the intention of this initial series of
commits is to implement an intrinsic or two from lots of different
categories, as examples and proofs of concept.
This initial commit introduces a small number of IR intrinsics for
instructions simple enough that they can use Tablegen ISel patterns:
the predicated versions of the VADD and VSUB instructions (both
integer and FP), VMIN and VMAX, and the float->half VCVT instruction
(predicated and unpredicated).
When using VPT-predicated instructions in automatic code generation,
it will be convenient to specify the predicate value as a vector of
the appropriate number of i1. To make it easy to specify all sizes of
an instruction in one go and give each one the matching predicate
vector type, I've added a system of Tablegen informational records
describing MVE's vector types: each one gives the underlying LLVM IR
ValueType (which may not be the same if the MVE vector is of
explicitly signed or unsigned integers) and an appropriate vNi1 to use
as the predicate vector.
(Also, those info records include the usual encoding for the types, so
that as we add associations between each instruction encoding and one
of the new `MVEVectorVTInfo` records, we can remove some of the
existing template parameters and replace them with references to the
vector type info's fields.)
The user-facing ACLE intrinsics will receive a predicate mask as a
16-bit integer, so I've also provided a pair of intrinsics i2v and
v2i, to convert between an integer and a vector of i1 by just changing
the register class.
Reviewers: dmgreen, miyuki, ostannard
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D67158
2019-10-08 00:00:51 +08:00
|
|
|
|
|
|
|
// FP vector types.
|
[ARM,MVE] Add intrinsics for gather/scatter load/stores.
This patch adds two new families of intrinsics, both of which are
memory accesses taking a vector of locations to load from / store to.
The vldrq_gather_base / vstrq_scatter_base intrinsics take a vector of
base addresses, and an immediate offset to be added consistently to
each one. vldrq_gather_offset / vstrq_scatter_offset take a scalar
base address, and a vector of offsets to add to it. The
'shifted_offset' variants also multiply each offset by the element
size type, so that the vector is effectively of array indices.
At the IR level, these operations are represented by a single set of
four IR intrinsics: {gather,scatter} × {base,offset}. The other
details (signed/unsigned, shift, and memory element size as opposed to
vector element size) are all specified by IR intrinsic polymorphism
and immediate operands, because that made the selection job easier
than making a huge family of similarly named intrinsics.
I considered using the standard IR representations such as
llvm.masked.gather, but they're not a good fit. In order to use
llvm.masked.gather to represent a gather_offset load with element size
smaller than a pointer, you'd have to expand the <8 x i16> vector of
offsets into an <8 x i16*> vector of pointers, which would be split up
during legalization, so you'd spend most of your time undoing the mess
it had made. Also, ISel support for llvm.masked.gather would be easy
enough in a trivial way (you can expand it into a gather-base load
with a zero immediate offset), but instruction-selecting lots of
fiddly idioms back into all the _other_ MVE load instructions would be
much more work. So I think dedicated IR intrinsics are the more
sensible approach, at least for the moment.
On the clang tablegen side, I've added two new features to the
Tablegen source accepted by MveEmitter: a 'CopyKind' type node for
defining a type that varies with the parameter type (it lets you ask
for an unsigned integer type of the same width as the parameter), and
an 'unsignedflag' value node for passing an immediate IR operand which
is 0 for a signed integer type or 1 for an unsigned one. That lets me
write each kind of intrinsic just once and get all its subtypes and
immediate arguments generated automatically.
Also I've tweaked the handling of pointer-typed values in the code
generation part of MveEmitter: they're generated as Address rather
than Value (i.e. including an alignment) so that they can be given to
the ordinary IR load and store operations, but I'd omitted the code to
convert them back to Value when they're going to be used as an
argument to an IR intrinsic.
On the MC side, I've enhanced MVEVectorVTInfo so that it can tell you
not only the full assembly-language suffix for a given vector type
(like 's32' or 'u16') but also the numeric-only one used by store
instructions (just '32' or '16').
Reviewers: dmgreen
Subscribers: kristof.beyls, hiraditya, cfe-commits, llvm-commits
Tags: #clang, #llvm
Differential Revision: https://reviews.llvm.org/D69791
2019-11-01 01:02:07 +08:00
|
|
|
def MVE_v8f16 : MVEVectorVTInfo<v8f16, v8i1, 0b01, "f", ?>;
|
|
|
|
def MVE_v4f32 : MVEVectorVTInfo<v4f32, v4i1, 0b10, "f", ?>;
|
|
|
|
def MVE_v2f64 : MVEVectorVTInfo<v2f64, v4i1, 0b11, "f", ?>;
|
[ARM] Begin adding IR intrinsics for MVE instructions.
This commit, together with the next few, will add a representative
sample of the kind of IR intrinsics that we'll need in order to
implement the user-facing ACLE intrinsics for MVE. Supporting all of
them will take more work; the intention of this initial series of
commits is to implement an intrinsic or two from lots of different
categories, as examples and proofs of concept.
This initial commit introduces a small number of IR intrinsics for
instructions simple enough that they can use Tablegen ISel patterns:
the predicated versions of the VADD and VSUB instructions (both
integer and FP), VMIN and VMAX, and the float->half VCVT instruction
(predicated and unpredicated).
When using VPT-predicated instructions in automatic code generation,
it will be convenient to specify the predicate value as a vector of
the appropriate number of i1. To make it easy to specify all sizes of
an instruction in one go and give each one the matching predicate
vector type, I've added a system of Tablegen informational records
describing MVE's vector types: each one gives the underlying LLVM IR
ValueType (which may not be the same if the MVE vector is of
explicitly signed or unsigned integers) and an appropriate vNi1 to use
as the predicate vector.
(Also, those info records include the usual encoding for the types, so
that as we add associations between each instruction encoding and one
of the new `MVEVectorVTInfo` records, we can remove some of the
existing template parameters and replace them with references to the
vector type info's fields.)
The user-facing ACLE intrinsics will receive a predicate mask as a
16-bit integer, so I've also provided a pair of intrinsics i2v and
v2i, to convert between an integer and a vector of i1 by just changing
the register class.
Reviewers: dmgreen, miyuki, ostannard
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D67158
2019-10-08 00:00:51 +08:00
|
|
|
|
[ARM] Add MVE vector load/store instructions.
This adds the rest of the vector memory access instructions. It
includes contiguous loads/stores, with an ordinary addressing mode
such as [r0,#offset] (plus writeback variants); gather loads and
scatter stores with a scalar base address register and a vector of
offsets from it (written [r0,q1] or similar); and gather/scatters with
a vector of base addresses (written [q0,#offset], again with
writeback). Additionally, some of the loads can widen each loaded
value into a larger vector lane, and the corresponding stores narrow
them again.
To implement these, we also have to add the addressing modes they
need. Also, in AsmParser, the `isMem` query function now has
subqueries `isGPRMem` and `isMVEMem`, according to which kind of base
register is used by a given memory access operand.
I've also had to add an extra check in `checkTargetMatchPredicate` in
the AsmParser, without which our last-minute check of `rGPR` register
operands against SP and PC was failing an assertion because Tablegen
had inserted an immediate 0 in place of one of a pair of tied register
operands. (This matches the way the corresponding check for `MCK_rGPR`
in `validateTargetOperandClass` is guarded.) Apparently the MVE load
instructions were the first to have ever triggered this assertion, but
I think only because they were the first to have a combination of the
usual Arm pre/post writeback system and the `rGPR` class in particular.
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62680
llvm-svn: 364291
2019-06-25 19:24:18 +08:00
|
|
|
// --------- Start of base classes for the instructions themselves
|
|
|
|
|
[ARM] Set up infrastructure for MVE vector instructions.
This commit prepares the way to start adding the main collection of
MVE instructions, which operate on the 128-bit vector registers.
The most obvious thing that's needed, and the simplest, is to add the
MQPR register class, which is like the existing QPR except that it has
fewer registers in it.
The more complicated part: MVE defines a system of vector predication,
in which instructions operating on 128-bit vector registers can be
constrained to operate on only a subset of the lanes, using a system
of prefix instructions similar to the existing Thumb IT, in that you
have one prefix instruction which designates up to 4 following
instructions as subject to predication, and within that sequence, the
predicate can be inverted by means of T/E suffixes ('Then' / 'Else').
To support instructions of this type, we've added two new Tablegen
classes `vpred_n` and `vpred_r` for standard clusters of MC operands
to add to a predicated instruction. Both include a flag indicating how
the instruction is predicated at all (options are T, E and 'not
predicated'), and an input register field for the register controlling
the set of active lanes. They differ from each other in that `vpred_r`
also includes an input operand for the previous value of the output
register, for instructions that leave inactive lanes unchanged.
`vpred_n` lacks that extra operand; it will be used for instructions
that don't preserve inactive lanes in their output register (either
because inactive lanes are zeroed, as the MVE load instructions do, or
because the output register isn't a vector at all).
This commit also adds the family of prefix instructions themselves
(VPT / VPST), and all the machinery needed to work with them in
assembly and disassembly (e.g. generating the 't' and 'e' mnemonic
suffixes on disassembled instructions within a predicated block)
I've added a couple of demo instructions that derive from the new
Tablegen base classes and use those two operand clusters. The bulk of
the vector instructions will come in followup commits small enough to
be manageable. (One exception is that I've added the full version of
`isMnemonicVPTPredicable` in the AsmParser, because it seemed
pointless to carefully split it up.)
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62669
llvm-svn: 363258
2019-06-13 21:11:13 +08:00
|
|
|
class MVE_MI<dag oops, dag iops, InstrItinClass itin, string asm,
|
|
|
|
string ops, string cstr, list<dag> pattern>
|
|
|
|
: Thumb2XI<oops, iops, AddrModeNone, 4, itin, !strconcat(asm, "\t", ops), cstr,
|
|
|
|
pattern>,
|
|
|
|
Requires<[HasMVEInt]> {
|
|
|
|
let D = MVEDomain;
|
|
|
|
let DecoderNamespace = "MVE";
|
|
|
|
}
|
|
|
|
|
|
|
|
// MVE_p is used for most predicated instructions, to add the cluster
|
|
|
|
// of input operands that provides the VPT suffix (none, T or E) and
|
|
|
|
// the input predicate register.
|
|
|
|
class MVE_p<dag oops, dag iops, InstrItinClass itin, string iname,
|
|
|
|
string suffix, string ops, vpred_ops vpred, string cstr,
|
|
|
|
list<dag> pattern=[]>
|
|
|
|
: MVE_MI<oops, !con(iops, (ins vpred:$vp)), itin,
|
|
|
|
// If the instruction has a suffix, like vadd.f32, then the
|
|
|
|
// VPT predication suffix goes before the dot, so the full
|
|
|
|
// name has to be "vadd${vp}.f32".
|
|
|
|
!strconcat(iname, "${vp}",
|
|
|
|
!if(!eq(suffix, ""), "", !strconcat(".", suffix))),
|
|
|
|
ops, !strconcat(cstr, vpred.vpred_constraint), pattern> {
|
|
|
|
let Inst{31-29} = 0b111;
|
|
|
|
let Inst{27-26} = 0b11;
|
|
|
|
}
|
|
|
|
|
[ARM] Add a batch of MVE floating-point instructions.
Summary:
This includes floating-point basic arithmetic (add/sub/multiply),
complex add/multiply, unary negation and absolute value, rounding to
integer value, and conversion to/from integer formats.
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62675
llvm-svn: 364013
2019-06-21 17:35:07 +08:00
|
|
|
class MVE_f<dag oops, dag iops, InstrItinClass itin, string iname,
|
|
|
|
string suffix, string ops, vpred_ops vpred, string cstr,
|
|
|
|
list<dag> pattern=[]>
|
|
|
|
: MVE_p<oops, iops, itin, iname, suffix, ops, vpred, cstr, pattern> {
|
|
|
|
let Predicates = [HasMVEFloat];
|
|
|
|
}
|
|
|
|
|
2019-06-11 20:04:32 +08:00
|
|
|
class MVE_MI_with_pred<dag oops, dag iops, InstrItinClass itin, string asm,
|
|
|
|
string ops, string cstr, list<dag> pattern>
|
|
|
|
: Thumb2I<oops, iops, AddrModeNone, 4, itin, asm, !strconcat("\t", ops), cstr,
|
|
|
|
pattern>,
|
|
|
|
Requires<[HasV8_1MMainline, HasMVEInt]> {
|
|
|
|
let D = MVEDomain;
|
|
|
|
let DecoderNamespace = "MVE";
|
|
|
|
}
|
|
|
|
|
[ARM] Add MVE vector bit-operations (register inputs).
This includes all the obvious bitwise operations (AND, OR, BIC, ORN,
MVN) in register-to-register forms, and the immediate forms of
AND/OR/BIC/ORN; byte-order reverse instructions; and the VMOVs that
access a single lane of a vector.
Some of those VMOVs (specifically, the ones that access a 32-bit lane)
share an encoding with existing instructions that were disassembled as
accessing half of a d-register (e.g. `vmov.32 r0, d1[0]`), but in
8.1-M they're now written as accessing a quarter of a q-register (e.g.
`vmov.32 r0, q0[2]`). The older syntax is still accepted by the
assembler.
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62673
llvm-svn: 363838
2019-06-20 00:43:53 +08:00
|
|
|
class MVE_VMOV_lane_base<dag oops, dag iops, InstrItinClass itin, string asm,
|
|
|
|
string suffix, string ops, string cstr,
|
|
|
|
list<dag> pattern>
|
|
|
|
: Thumb2I<oops, iops, AddrModeNone, 4, itin, asm,
|
|
|
|
!if(!eq(suffix, ""), "", "." # suffix) # "\t" # ops,
|
|
|
|
cstr, pattern>,
|
|
|
|
Requires<[HasV8_1MMainline, HasMVEInt]> {
|
|
|
|
let D = MVEDomain;
|
|
|
|
let DecoderNamespace = "MVE";
|
|
|
|
}
|
|
|
|
|
2019-06-18 23:05:42 +08:00
|
|
|
class MVE_ScalarShift<string iname, dag oops, dag iops, string asm, string cstr,
|
2019-06-11 20:04:32 +08:00
|
|
|
list<dag> pattern=[]>
|
|
|
|
: MVE_MI_with_pred<oops, iops, NoItinerary, iname, asm, cstr, pattern> {
|
|
|
|
let Inst{31-20} = 0b111010100101;
|
|
|
|
let Inst{8} = 0b1;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
2019-06-18 23:05:42 +08:00
|
|
|
class MVE_ScalarShiftSingleReg<string iname, dag iops, string asm, string cstr,
|
2019-06-11 20:04:32 +08:00
|
|
|
list<dag> pattern=[]>
|
2019-06-18 23:05:42 +08:00
|
|
|
: MVE_ScalarShift<iname, (outs rGPR:$RdaDest), iops, asm, cstr, pattern> {
|
2019-06-11 20:04:32 +08:00
|
|
|
bits<4> RdaDest;
|
|
|
|
|
|
|
|
let Inst{19-16} = RdaDest{3-0};
|
|
|
|
}
|
|
|
|
|
[ARM,MVE] Add intrinsics for scalar shifts.
This fills in the small family of MVE intrinsics that have nothing to
do with vectors: they implement bit-shift operations on 32- or 64-bit
values held in one or two general-purpose registers. Most of these
shift operations saturate if shifting left, and round to nearest if
shifting right, although LSLL and ASRL behave like ordinary shifts.
When these instructions take a variable shift count in a register,
they pay attention to its sign, so that (for example) LSLL or UQRSHLL
will shift left if given a positive number but right if given a
negative one. That makes even LSLL and ASRL different enough from
standard LLVM IR shift semantics that I couldn't see any better
alternative than to simply model the whole family as a set of
MVE-specific IR intrinsics.
(The //immediate// forms of LSLL and ASRL, on the other hand, do
behave exactly like a standard IR shift of a 64-bit value. In fact,
those forms don't have ACLE intrinsics defined at all, because you can
just write an ordinary C shift operation if you want one of those.)
The 64-bit shifts have to be instruction-selected in C++, because they
deliver two output values. But the 32-bit ones are simple enough that
I could write a DAG isel pattern directly into each Instruction
record.
Reviewers: ostannard, MarkMurrayARM, dmgreen
Reviewed By: dmgreen
Subscribers: kristof.beyls, hiraditya, cfe-commits, llvm-commits
Tags: #clang, #llvm
Differential Revision: https://reviews.llvm.org/D70319
2019-11-19 22:47:07 +08:00
|
|
|
class MVE_ScalarShiftSRegImm<string iname, bits<2> op5_4>
|
2019-06-18 23:05:42 +08:00
|
|
|
: MVE_ScalarShiftSingleReg<iname, (ins rGPR:$RdaSrc, long_shift:$imm),
|
[ARM,MVE] Add intrinsics for scalar shifts.
This fills in the small family of MVE intrinsics that have nothing to
do with vectors: they implement bit-shift operations on 32- or 64-bit
values held in one or two general-purpose registers. Most of these
shift operations saturate if shifting left, and round to nearest if
shifting right, although LSLL and ASRL behave like ordinary shifts.
When these instructions take a variable shift count in a register,
they pay attention to its sign, so that (for example) LSLL or UQRSHLL
will shift left if given a positive number but right if given a
negative one. That makes even LSLL and ASRL different enough from
standard LLVM IR shift semantics that I couldn't see any better
alternative than to simply model the whole family as a set of
MVE-specific IR intrinsics.
(The //immediate// forms of LSLL and ASRL, on the other hand, do
behave exactly like a standard IR shift of a 64-bit value. In fact,
those forms don't have ACLE intrinsics defined at all, because you can
just write an ordinary C shift operation if you want one of those.)
The 64-bit shifts have to be instruction-selected in C++, because they
deliver two output values. But the 32-bit ones are simple enough that
I could write a DAG isel pattern directly into each Instruction
record.
Reviewers: ostannard, MarkMurrayARM, dmgreen
Reviewed By: dmgreen
Subscribers: kristof.beyls, hiraditya, cfe-commits, llvm-commits
Tags: #clang, #llvm
Differential Revision: https://reviews.llvm.org/D70319
2019-11-19 22:47:07 +08:00
|
|
|
"$RdaSrc, $imm", "$RdaDest = $RdaSrc",
|
|
|
|
[(set rGPR:$RdaDest,
|
|
|
|
(i32 (!cast<Intrinsic>("int_arm_mve_" # iname)
|
|
|
|
(i32 rGPR:$RdaSrc), (i32 imm:$imm))))]> {
|
2019-06-11 20:04:32 +08:00
|
|
|
bits<5> imm;
|
|
|
|
|
|
|
|
let Inst{15} = 0b0;
|
|
|
|
let Inst{14-12} = imm{4-2};
|
|
|
|
let Inst{11-8} = 0b1111;
|
|
|
|
let Inst{7-6} = imm{1-0};
|
|
|
|
let Inst{5-4} = op5_4{1-0};
|
|
|
|
let Inst{3-0} = 0b1111;
|
|
|
|
}
|
|
|
|
|
2019-06-18 23:05:42 +08:00
|
|
|
def MVE_SQSHL : MVE_ScalarShiftSRegImm<"sqshl", 0b11>;
|
|
|
|
def MVE_SRSHR : MVE_ScalarShiftSRegImm<"srshr", 0b10>;
|
|
|
|
def MVE_UQSHL : MVE_ScalarShiftSRegImm<"uqshl", 0b00>;
|
|
|
|
def MVE_URSHR : MVE_ScalarShiftSRegImm<"urshr", 0b01>;
|
2019-06-11 20:04:32 +08:00
|
|
|
|
[ARM,MVE] Add intrinsics for scalar shifts.
This fills in the small family of MVE intrinsics that have nothing to
do with vectors: they implement bit-shift operations on 32- or 64-bit
values held in one or two general-purpose registers. Most of these
shift operations saturate if shifting left, and round to nearest if
shifting right, although LSLL and ASRL behave like ordinary shifts.
When these instructions take a variable shift count in a register,
they pay attention to its sign, so that (for example) LSLL or UQRSHLL
will shift left if given a positive number but right if given a
negative one. That makes even LSLL and ASRL different enough from
standard LLVM IR shift semantics that I couldn't see any better
alternative than to simply model the whole family as a set of
MVE-specific IR intrinsics.
(The //immediate// forms of LSLL and ASRL, on the other hand, do
behave exactly like a standard IR shift of a 64-bit value. In fact,
those forms don't have ACLE intrinsics defined at all, because you can
just write an ordinary C shift operation if you want one of those.)
The 64-bit shifts have to be instruction-selected in C++, because they
deliver two output values. But the 32-bit ones are simple enough that
I could write a DAG isel pattern directly into each Instruction
record.
Reviewers: ostannard, MarkMurrayARM, dmgreen
Reviewed By: dmgreen
Subscribers: kristof.beyls, hiraditya, cfe-commits, llvm-commits
Tags: #clang, #llvm
Differential Revision: https://reviews.llvm.org/D70319
2019-11-19 22:47:07 +08:00
|
|
|
class MVE_ScalarShiftSRegReg<string iname, bits<2> op5_4>
|
2019-06-18 23:05:42 +08:00
|
|
|
: MVE_ScalarShiftSingleReg<iname, (ins rGPR:$RdaSrc, rGPR:$Rm),
|
[ARM,MVE] Add intrinsics for scalar shifts.
This fills in the small family of MVE intrinsics that have nothing to
do with vectors: they implement bit-shift operations on 32- or 64-bit
values held in one or two general-purpose registers. Most of these
shift operations saturate if shifting left, and round to nearest if
shifting right, although LSLL and ASRL behave like ordinary shifts.
When these instructions take a variable shift count in a register,
they pay attention to its sign, so that (for example) LSLL or UQRSHLL
will shift left if given a positive number but right if given a
negative one. That makes even LSLL and ASRL different enough from
standard LLVM IR shift semantics that I couldn't see any better
alternative than to simply model the whole family as a set of
MVE-specific IR intrinsics.
(The //immediate// forms of LSLL and ASRL, on the other hand, do
behave exactly like a standard IR shift of a 64-bit value. In fact,
those forms don't have ACLE intrinsics defined at all, because you can
just write an ordinary C shift operation if you want one of those.)
The 64-bit shifts have to be instruction-selected in C++, because they
deliver two output values. But the 32-bit ones are simple enough that
I could write a DAG isel pattern directly into each Instruction
record.
Reviewers: ostannard, MarkMurrayARM, dmgreen
Reviewed By: dmgreen
Subscribers: kristof.beyls, hiraditya, cfe-commits, llvm-commits
Tags: #clang, #llvm
Differential Revision: https://reviews.llvm.org/D70319
2019-11-19 22:47:07 +08:00
|
|
|
"$RdaSrc, $Rm", "$RdaDest = $RdaSrc",
|
|
|
|
[(set rGPR:$RdaDest,
|
|
|
|
(i32 (!cast<Intrinsic>("int_arm_mve_" # iname)
|
|
|
|
(i32 rGPR:$RdaSrc), (i32 rGPR:$Rm))))]> {
|
2019-06-11 20:04:32 +08:00
|
|
|
bits<4> Rm;
|
|
|
|
|
|
|
|
let Inst{15-12} = Rm{3-0};
|
|
|
|
let Inst{11-8} = 0b1111;
|
|
|
|
let Inst{7-6} = 0b00;
|
|
|
|
let Inst{5-4} = op5_4{1-0};
|
|
|
|
let Inst{3-0} = 0b1101;
|
2019-09-09 16:50:28 +08:00
|
|
|
|
|
|
|
let Unpredictable{8-6} = 0b111;
|
2019-06-11 20:04:32 +08:00
|
|
|
}
|
|
|
|
|
2019-06-18 23:05:42 +08:00
|
|
|
def MVE_SQRSHR : MVE_ScalarShiftSRegReg<"sqrshr", 0b10>;
|
|
|
|
def MVE_UQRSHL : MVE_ScalarShiftSRegReg<"uqrshl", 0b00>;
|
2019-06-11 20:04:32 +08:00
|
|
|
|
2019-06-18 23:05:42 +08:00
|
|
|
class MVE_ScalarShiftDoubleReg<string iname, dag iops, string asm,
|
|
|
|
string cstr, list<dag> pattern=[]>
|
|
|
|
: MVE_ScalarShift<iname, (outs tGPREven:$RdaLo, tGPROdd:$RdaHi),
|
|
|
|
iops, asm, cstr, pattern> {
|
2019-06-11 20:04:32 +08:00
|
|
|
bits<4> RdaLo;
|
|
|
|
bits<4> RdaHi;
|
|
|
|
|
|
|
|
let Inst{19-17} = RdaLo{3-1};
|
|
|
|
let Inst{11-9} = RdaHi{3-1};
|
|
|
|
}
|
|
|
|
|
2019-06-18 23:05:42 +08:00
|
|
|
class MVE_ScalarShiftDRegImm<string iname, bits<2> op5_4, bit op16,
|
|
|
|
list<dag> pattern=[]>
|
|
|
|
: MVE_ScalarShiftDoubleReg<
|
|
|
|
iname, (ins tGPREven:$RdaLo_src, tGPROdd:$RdaHi_src, long_shift:$imm),
|
|
|
|
"$RdaLo, $RdaHi, $imm", "$RdaLo = $RdaLo_src,$RdaHi = $RdaHi_src",
|
|
|
|
pattern> {
|
2019-06-11 20:04:32 +08:00
|
|
|
bits<5> imm;
|
|
|
|
|
|
|
|
let Inst{16} = op16;
|
|
|
|
let Inst{15} = 0b0;
|
|
|
|
let Inst{14-12} = imm{4-2};
|
|
|
|
let Inst{7-6} = imm{1-0};
|
|
|
|
let Inst{5-4} = op5_4{1-0};
|
|
|
|
let Inst{3-0} = 0b1111;
|
|
|
|
}
|
|
|
|
|
[ARM] Add <saturate> operand to SQRSHRL and UQRSHLL
Summary:
According to the new Armv8-M specification
https://static.docs.arm.com/ddi0553/bh/DDI0553B_h_armv8m_arm.pdf the
instructions SQRSHRL and UQRSHLL now have an additional immediate
operand <saturate>. The new assembly syntax is:
SQRSHRL<c> RdaLo, RdaHi, #<saturate>, Rm
UQRSHLL<c> RdaLo, RdaHi, #<saturate>, Rm
where <saturate> can be either 64 (the existing behavior) or 48, in
that case the result is saturated to 48 bits.
The new operand is encoded as follows:
#64 Encoded as sat = 0
#48 Encoded as sat = 1
sat is bit 7 of the instruction bit pattern.
This patch adds a new assembler operand class MveSaturateOperand which
implements parsing and encoding. Decoding is implemented in
DecodeMVEOverlappingLongShift.
Reviewers: ostannard, simon_tatham, t.p.northover, samparker, dmgreen, SjoerdMeijer
Reviewed By: simon_tatham
Subscribers: javed.absar, kristof.beyls, hiraditya, pbarrio, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D64810
llvm-svn: 366555
2019-07-19 17:46:28 +08:00
|
|
|
class MVE_ScalarShiftDRegRegBase<string iname, dag iops, string asm,
|
|
|
|
bit op5, bit op16, list<dag> pattern=[]>
|
2019-06-18 23:05:42 +08:00
|
|
|
: MVE_ScalarShiftDoubleReg<
|
[ARM] Add <saturate> operand to SQRSHRL and UQRSHLL
Summary:
According to the new Armv8-M specification
https://static.docs.arm.com/ddi0553/bh/DDI0553B_h_armv8m_arm.pdf the
instructions SQRSHRL and UQRSHLL now have an additional immediate
operand <saturate>. The new assembly syntax is:
SQRSHRL<c> RdaLo, RdaHi, #<saturate>, Rm
UQRSHLL<c> RdaLo, RdaHi, #<saturate>, Rm
where <saturate> can be either 64 (the existing behavior) or 48, in
that case the result is saturated to 48 bits.
The new operand is encoded as follows:
#64 Encoded as sat = 0
#48 Encoded as sat = 1
sat is bit 7 of the instruction bit pattern.
This patch adds a new assembler operand class MveSaturateOperand which
implements parsing and encoding. Decoding is implemented in
DecodeMVEOverlappingLongShift.
Reviewers: ostannard, simon_tatham, t.p.northover, samparker, dmgreen, SjoerdMeijer
Reviewed By: simon_tatham
Subscribers: javed.absar, kristof.beyls, hiraditya, pbarrio, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D64810
llvm-svn: 366555
2019-07-19 17:46:28 +08:00
|
|
|
iname, iops, asm, "@earlyclobber $RdaHi,@earlyclobber $RdaLo,"
|
|
|
|
"$RdaLo = $RdaLo_src,$RdaHi = $RdaHi_src",
|
2019-06-18 23:05:42 +08:00
|
|
|
pattern> {
|
2019-06-11 20:04:32 +08:00
|
|
|
bits<4> Rm;
|
|
|
|
|
|
|
|
let Inst{16} = op16;
|
|
|
|
let Inst{15-12} = Rm{3-0};
|
[ARM] Add <saturate> operand to SQRSHRL and UQRSHLL
Summary:
According to the new Armv8-M specification
https://static.docs.arm.com/ddi0553/bh/DDI0553B_h_armv8m_arm.pdf the
instructions SQRSHRL and UQRSHLL now have an additional immediate
operand <saturate>. The new assembly syntax is:
SQRSHRL<c> RdaLo, RdaHi, #<saturate>, Rm
UQRSHLL<c> RdaLo, RdaHi, #<saturate>, Rm
where <saturate> can be either 64 (the existing behavior) or 48, in
that case the result is saturated to 48 bits.
The new operand is encoded as follows:
#64 Encoded as sat = 0
#48 Encoded as sat = 1
sat is bit 7 of the instruction bit pattern.
This patch adds a new assembler operand class MveSaturateOperand which
implements parsing and encoding. Decoding is implemented in
DecodeMVEOverlappingLongShift.
Reviewers: ostannard, simon_tatham, t.p.northover, samparker, dmgreen, SjoerdMeijer
Reviewed By: simon_tatham
Subscribers: javed.absar, kristof.beyls, hiraditya, pbarrio, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D64810
llvm-svn: 366555
2019-07-19 17:46:28 +08:00
|
|
|
let Inst{6} = 0b0;
|
2019-06-11 20:04:32 +08:00
|
|
|
let Inst{5} = op5;
|
|
|
|
let Inst{4} = 0b0;
|
|
|
|
let Inst{3-0} = 0b1101;
|
|
|
|
|
|
|
|
// Custom decoder method because of the following overlapping encodings:
|
|
|
|
// ASRL and SQRSHR
|
|
|
|
// LSLL and UQRSHL
|
|
|
|
// SQRSHRL and SQRSHR
|
|
|
|
// UQRSHLL and UQRSHL
|
|
|
|
let DecoderMethod = "DecodeMVEOverlappingLongShift";
|
|
|
|
}
|
|
|
|
|
[ARM] Add <saturate> operand to SQRSHRL and UQRSHLL
Summary:
According to the new Armv8-M specification
https://static.docs.arm.com/ddi0553/bh/DDI0553B_h_armv8m_arm.pdf the
instructions SQRSHRL and UQRSHLL now have an additional immediate
operand <saturate>. The new assembly syntax is:
SQRSHRL<c> RdaLo, RdaHi, #<saturate>, Rm
UQRSHLL<c> RdaLo, RdaHi, #<saturate>, Rm
where <saturate> can be either 64 (the existing behavior) or 48, in
that case the result is saturated to 48 bits.
The new operand is encoded as follows:
#64 Encoded as sat = 0
#48 Encoded as sat = 1
sat is bit 7 of the instruction bit pattern.
This patch adds a new assembler operand class MveSaturateOperand which
implements parsing and encoding. Decoding is implemented in
DecodeMVEOverlappingLongShift.
Reviewers: ostannard, simon_tatham, t.p.northover, samparker, dmgreen, SjoerdMeijer
Reviewed By: simon_tatham
Subscribers: javed.absar, kristof.beyls, hiraditya, pbarrio, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D64810
llvm-svn: 366555
2019-07-19 17:46:28 +08:00
|
|
|
class MVE_ScalarShiftDRegReg<string iname, bit op5, list<dag> pattern=[]>
|
|
|
|
: MVE_ScalarShiftDRegRegBase<
|
|
|
|
iname, (ins tGPREven:$RdaLo_src, tGPROdd:$RdaHi_src, rGPR:$Rm),
|
|
|
|
"$RdaLo, $RdaHi, $Rm", op5, 0b0, pattern> {
|
|
|
|
|
|
|
|
let Inst{7} = 0b0;
|
|
|
|
}
|
|
|
|
|
|
|
|
class MVE_ScalarShiftDRegRegWithSat<string iname, bit op5, list<dag> pattern=[]>
|
|
|
|
: MVE_ScalarShiftDRegRegBase<
|
|
|
|
iname, (ins tGPREven:$RdaLo_src, tGPROdd:$RdaHi_src, rGPR:$Rm, saturateop:$sat),
|
|
|
|
"$RdaLo, $RdaHi, $sat, $Rm", op5, 0b1, pattern> {
|
|
|
|
bit sat;
|
|
|
|
|
|
|
|
let Inst{7} = sat;
|
|
|
|
}
|
|
|
|
|
|
|
|
def MVE_ASRLr : MVE_ScalarShiftDRegReg<"asrl", 0b1, [(set tGPREven:$RdaLo, tGPROdd:$RdaHi,
|
2019-06-28 23:43:31 +08:00
|
|
|
(ARMasrl tGPREven:$RdaLo_src,
|
|
|
|
tGPROdd:$RdaHi_src, rGPR:$Rm))]>;
|
|
|
|
def MVE_ASRLi : MVE_ScalarShiftDRegImm<"asrl", 0b10, ?, [(set tGPREven:$RdaLo, tGPROdd:$RdaHi,
|
|
|
|
(ARMasrl tGPREven:$RdaLo_src,
|
2019-09-25 18:16:48 +08:00
|
|
|
tGPROdd:$RdaHi_src, (i32 long_shift:$imm)))]>;
|
[ARM] Add <saturate> operand to SQRSHRL and UQRSHLL
Summary:
According to the new Armv8-M specification
https://static.docs.arm.com/ddi0553/bh/DDI0553B_h_armv8m_arm.pdf the
instructions SQRSHRL and UQRSHLL now have an additional immediate
operand <saturate>. The new assembly syntax is:
SQRSHRL<c> RdaLo, RdaHi, #<saturate>, Rm
UQRSHLL<c> RdaLo, RdaHi, #<saturate>, Rm
where <saturate> can be either 64 (the existing behavior) or 48, in
that case the result is saturated to 48 bits.
The new operand is encoded as follows:
#64 Encoded as sat = 0
#48 Encoded as sat = 1
sat is bit 7 of the instruction bit pattern.
This patch adds a new assembler operand class MveSaturateOperand which
implements parsing and encoding. Decoding is implemented in
DecodeMVEOverlappingLongShift.
Reviewers: ostannard, simon_tatham, t.p.northover, samparker, dmgreen, SjoerdMeijer
Reviewed By: simon_tatham
Subscribers: javed.absar, kristof.beyls, hiraditya, pbarrio, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D64810
llvm-svn: 366555
2019-07-19 17:46:28 +08:00
|
|
|
def MVE_LSLLr : MVE_ScalarShiftDRegReg<"lsll", 0b0, [(set tGPREven:$RdaLo, tGPROdd:$RdaHi,
|
2019-06-28 23:43:31 +08:00
|
|
|
(ARMlsll tGPREven:$RdaLo_src,
|
|
|
|
tGPROdd:$RdaHi_src, rGPR:$Rm))]>;
|
|
|
|
def MVE_LSLLi : MVE_ScalarShiftDRegImm<"lsll", 0b00, ?, [(set tGPREven:$RdaLo, tGPROdd:$RdaHi,
|
|
|
|
(ARMlsll tGPREven:$RdaLo_src,
|
2019-09-25 18:16:48 +08:00
|
|
|
tGPROdd:$RdaHi_src, (i32 long_shift:$imm)))]>;
|
2019-06-28 23:43:31 +08:00
|
|
|
def MVE_LSRL : MVE_ScalarShiftDRegImm<"lsrl", 0b01, ?, [(set tGPREven:$RdaLo, tGPROdd:$RdaHi,
|
|
|
|
(ARMlsrl tGPREven:$RdaLo_src,
|
2019-09-25 18:16:48 +08:00
|
|
|
tGPROdd:$RdaHi_src, (i32 long_shift:$imm)))]>;
|
2019-06-11 20:04:32 +08:00
|
|
|
|
[ARM] Add <saturate> operand to SQRSHRL and UQRSHLL
Summary:
According to the new Armv8-M specification
https://static.docs.arm.com/ddi0553/bh/DDI0553B_h_armv8m_arm.pdf the
instructions SQRSHRL and UQRSHLL now have an additional immediate
operand <saturate>. The new assembly syntax is:
SQRSHRL<c> RdaLo, RdaHi, #<saturate>, Rm
UQRSHLL<c> RdaLo, RdaHi, #<saturate>, Rm
where <saturate> can be either 64 (the existing behavior) or 48, in
that case the result is saturated to 48 bits.
The new operand is encoded as follows:
#64 Encoded as sat = 0
#48 Encoded as sat = 1
sat is bit 7 of the instruction bit pattern.
This patch adds a new assembler operand class MveSaturateOperand which
implements parsing and encoding. Decoding is implemented in
DecodeMVEOverlappingLongShift.
Reviewers: ostannard, simon_tatham, t.p.northover, samparker, dmgreen, SjoerdMeijer
Reviewed By: simon_tatham
Subscribers: javed.absar, kristof.beyls, hiraditya, pbarrio, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D64810
llvm-svn: 366555
2019-07-19 17:46:28 +08:00
|
|
|
def MVE_SQRSHRL : MVE_ScalarShiftDRegRegWithSat<"sqrshrl", 0b1>;
|
2019-06-18 23:05:42 +08:00
|
|
|
def MVE_SQSHLL : MVE_ScalarShiftDRegImm<"sqshll", 0b11, 0b1>;
|
|
|
|
def MVE_SRSHRL : MVE_ScalarShiftDRegImm<"srshrl", 0b10, 0b1>;
|
2019-06-11 20:04:32 +08:00
|
|
|
|
[ARM] Add <saturate> operand to SQRSHRL and UQRSHLL
Summary:
According to the new Armv8-M specification
https://static.docs.arm.com/ddi0553/bh/DDI0553B_h_armv8m_arm.pdf the
instructions SQRSHRL and UQRSHLL now have an additional immediate
operand <saturate>. The new assembly syntax is:
SQRSHRL<c> RdaLo, RdaHi, #<saturate>, Rm
UQRSHLL<c> RdaLo, RdaHi, #<saturate>, Rm
where <saturate> can be either 64 (the existing behavior) or 48, in
that case the result is saturated to 48 bits.
The new operand is encoded as follows:
#64 Encoded as sat = 0
#48 Encoded as sat = 1
sat is bit 7 of the instruction bit pattern.
This patch adds a new assembler operand class MveSaturateOperand which
implements parsing and encoding. Decoding is implemented in
DecodeMVEOverlappingLongShift.
Reviewers: ostannard, simon_tatham, t.p.northover, samparker, dmgreen, SjoerdMeijer
Reviewed By: simon_tatham
Subscribers: javed.absar, kristof.beyls, hiraditya, pbarrio, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D64810
llvm-svn: 366555
2019-07-19 17:46:28 +08:00
|
|
|
def MVE_UQRSHLL : MVE_ScalarShiftDRegRegWithSat<"uqrshll", 0b0>;
|
2019-06-18 23:05:42 +08:00
|
|
|
def MVE_UQSHLL : MVE_ScalarShiftDRegImm<"uqshll", 0b00, 0b1>;
|
|
|
|
def MVE_URSHRL : MVE_ScalarShiftDRegImm<"urshrl", 0b01, 0b1>;
|
[ARM] Set up infrastructure for MVE vector instructions.
This commit prepares the way to start adding the main collection of
MVE instructions, which operate on the 128-bit vector registers.
The most obvious thing that's needed, and the simplest, is to add the
MQPR register class, which is like the existing QPR except that it has
fewer registers in it.
The more complicated part: MVE defines a system of vector predication,
in which instructions operating on 128-bit vector registers can be
constrained to operate on only a subset of the lanes, using a system
of prefix instructions similar to the existing Thumb IT, in that you
have one prefix instruction which designates up to 4 following
instructions as subject to predication, and within that sequence, the
predicate can be inverted by means of T/E suffixes ('Then' / 'Else').
To support instructions of this type, we've added two new Tablegen
classes `vpred_n` and `vpred_r` for standard clusters of MC operands
to add to a predicated instruction. Both include a flag indicating how
the instruction is predicated at all (options are T, E and 'not
predicated'), and an input register field for the register controlling
the set of active lanes. They differ from each other in that `vpred_r`
also includes an input operand for the previous value of the output
register, for instructions that leave inactive lanes unchanged.
`vpred_n` lacks that extra operand; it will be used for instructions
that don't preserve inactive lanes in their output register (either
because inactive lanes are zeroed, as the MVE load instructions do, or
because the output register isn't a vector at all).
This commit also adds the family of prefix instructions themselves
(VPT / VPST), and all the machinery needed to work with them in
assembly and disassembly (e.g. generating the 't' and 'e' mnemonic
suffixes on disassembled instructions within a predicated block)
I've added a couple of demo instructions that derive from the new
Tablegen base classes and use those two operand clusters. The bulk of
the vector instructions will come in followup commits small enough to
be manageable. (One exception is that I've added the full version of
`isMnemonicVPTPredicable` in the AsmParser, because it seemed
pointless to carefully split it up.)
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62669
llvm-svn: 363258
2019-06-13 21:11:13 +08:00
|
|
|
|
|
|
|
// start of mve_rDest instructions
|
|
|
|
|
2019-06-18 23:05:42 +08:00
|
|
|
class MVE_rDest<dag oops, dag iops, InstrItinClass itin,
|
|
|
|
string iname, string suffix,
|
[ARM] Set up infrastructure for MVE vector instructions.
This commit prepares the way to start adding the main collection of
MVE instructions, which operate on the 128-bit vector registers.
The most obvious thing that's needed, and the simplest, is to add the
MQPR register class, which is like the existing QPR except that it has
fewer registers in it.
The more complicated part: MVE defines a system of vector predication,
in which instructions operating on 128-bit vector registers can be
constrained to operate on only a subset of the lanes, using a system
of prefix instructions similar to the existing Thumb IT, in that you
have one prefix instruction which designates up to 4 following
instructions as subject to predication, and within that sequence, the
predicate can be inverted by means of T/E suffixes ('Then' / 'Else').
To support instructions of this type, we've added two new Tablegen
classes `vpred_n` and `vpred_r` for standard clusters of MC operands
to add to a predicated instruction. Both include a flag indicating how
the instruction is predicated at all (options are T, E and 'not
predicated'), and an input register field for the register controlling
the set of active lanes. They differ from each other in that `vpred_r`
also includes an input operand for the previous value of the output
register, for instructions that leave inactive lanes unchanged.
`vpred_n` lacks that extra operand; it will be used for instructions
that don't preserve inactive lanes in their output register (either
because inactive lanes are zeroed, as the MVE load instructions do, or
because the output register isn't a vector at all).
This commit also adds the family of prefix instructions themselves
(VPT / VPST), and all the machinery needed to work with them in
assembly and disassembly (e.g. generating the 't' and 'e' mnemonic
suffixes on disassembled instructions within a predicated block)
I've added a couple of demo instructions that derive from the new
Tablegen base classes and use those two operand clusters. The bulk of
the vector instructions will come in followup commits small enough to
be manageable. (One exception is that I've added the full version of
`isMnemonicVPTPredicable` in the AsmParser, because it seemed
pointless to carefully split it up.)
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62669
llvm-svn: 363258
2019-06-13 21:11:13 +08:00
|
|
|
string ops, string cstr, list<dag> pattern=[]>
|
|
|
|
// Always use vpred_n and not vpred_r: with the output register being
|
|
|
|
// a GPR and not a vector register, there can't be any question of
|
|
|
|
// what to put in its inactive lanes.
|
|
|
|
: MVE_p<oops, iops, itin, iname, suffix, ops, vpred_n, cstr, pattern> {
|
|
|
|
|
|
|
|
let Inst{25-23} = 0b101;
|
|
|
|
let Inst{11-9} = 0b111;
|
|
|
|
let Inst{4} = 0b0;
|
|
|
|
}
|
|
|
|
|
2019-06-18 23:05:42 +08:00
|
|
|
class MVE_VABAV<string suffix, bit U, bits<2> size, list<dag> pattern=[]>
|
[ARM] Set up infrastructure for MVE vector instructions.
This commit prepares the way to start adding the main collection of
MVE instructions, which operate on the 128-bit vector registers.
The most obvious thing that's needed, and the simplest, is to add the
MQPR register class, which is like the existing QPR except that it has
fewer registers in it.
The more complicated part: MVE defines a system of vector predication,
in which instructions operating on 128-bit vector registers can be
constrained to operate on only a subset of the lanes, using a system
of prefix instructions similar to the existing Thumb IT, in that you
have one prefix instruction which designates up to 4 following
instructions as subject to predication, and within that sequence, the
predicate can be inverted by means of T/E suffixes ('Then' / 'Else').
To support instructions of this type, we've added two new Tablegen
classes `vpred_n` and `vpred_r` for standard clusters of MC operands
to add to a predicated instruction. Both include a flag indicating how
the instruction is predicated at all (options are T, E and 'not
predicated'), and an input register field for the register controlling
the set of active lanes. They differ from each other in that `vpred_r`
also includes an input operand for the previous value of the output
register, for instructions that leave inactive lanes unchanged.
`vpred_n` lacks that extra operand; it will be used for instructions
that don't preserve inactive lanes in their output register (either
because inactive lanes are zeroed, as the MVE load instructions do, or
because the output register isn't a vector at all).
This commit also adds the family of prefix instructions themselves
(VPT / VPST), and all the machinery needed to work with them in
assembly and disassembly (e.g. generating the 't' and 'e' mnemonic
suffixes on disassembled instructions within a predicated block)
I've added a couple of demo instructions that derive from the new
Tablegen base classes and use those two operand clusters. The bulk of
the vector instructions will come in followup commits small enough to
be manageable. (One exception is that I've added the full version of
`isMnemonicVPTPredicable` in the AsmParser, because it seemed
pointless to carefully split it up.)
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62669
llvm-svn: 363258
2019-06-13 21:11:13 +08:00
|
|
|
: MVE_rDest<(outs rGPR:$Rda), (ins rGPR:$Rda_src, MQPR:$Qn, MQPR:$Qm),
|
|
|
|
NoItinerary, "vabav", suffix, "$Rda, $Qn, $Qm", "$Rda = $Rda_src",
|
|
|
|
pattern> {
|
|
|
|
bits<4> Qm;
|
|
|
|
bits<4> Qn;
|
|
|
|
bits<4> Rda;
|
|
|
|
|
|
|
|
let Inst{28} = U;
|
|
|
|
let Inst{22} = 0b0;
|
|
|
|
let Inst{21-20} = size{1-0};
|
|
|
|
let Inst{19-17} = Qn{2-0};
|
|
|
|
let Inst{16} = 0b0;
|
|
|
|
let Inst{15-12} = Rda{3-0};
|
|
|
|
let Inst{8} = 0b1;
|
|
|
|
let Inst{7} = Qn{3};
|
|
|
|
let Inst{6} = 0b0;
|
|
|
|
let Inst{5} = Qm{3};
|
|
|
|
let Inst{3-1} = Qm{2-0};
|
|
|
|
let Inst{0} = 0b1;
|
|
|
|
}
|
|
|
|
|
2019-06-18 23:05:42 +08:00
|
|
|
def MVE_VABAVs8 : MVE_VABAV<"s8", 0b0, 0b00>;
|
|
|
|
def MVE_VABAVs16 : MVE_VABAV<"s16", 0b0, 0b01>;
|
|
|
|
def MVE_VABAVs32 : MVE_VABAV<"s32", 0b0, 0b10>;
|
|
|
|
def MVE_VABAVu8 : MVE_VABAV<"u8", 0b1, 0b00>;
|
|
|
|
def MVE_VABAVu16 : MVE_VABAV<"u16", 0b1, 0b01>;
|
|
|
|
def MVE_VABAVu32 : MVE_VABAV<"u32", 0b1, 0b10>;
|
[ARM] Set up infrastructure for MVE vector instructions.
This commit prepares the way to start adding the main collection of
MVE instructions, which operate on the 128-bit vector registers.
The most obvious thing that's needed, and the simplest, is to add the
MQPR register class, which is like the existing QPR except that it has
fewer registers in it.
The more complicated part: MVE defines a system of vector predication,
in which instructions operating on 128-bit vector registers can be
constrained to operate on only a subset of the lanes, using a system
of prefix instructions similar to the existing Thumb IT, in that you
have one prefix instruction which designates up to 4 following
instructions as subject to predication, and within that sequence, the
predicate can be inverted by means of T/E suffixes ('Then' / 'Else').
To support instructions of this type, we've added two new Tablegen
classes `vpred_n` and `vpred_r` for standard clusters of MC operands
to add to a predicated instruction. Both include a flag indicating how
the instruction is predicated at all (options are T, E and 'not
predicated'), and an input register field for the register controlling
the set of active lanes. They differ from each other in that `vpred_r`
also includes an input operand for the previous value of the output
register, for instructions that leave inactive lanes unchanged.
`vpred_n` lacks that extra operand; it will be used for instructions
that don't preserve inactive lanes in their output register (either
because inactive lanes are zeroed, as the MVE load instructions do, or
because the output register isn't a vector at all).
This commit also adds the family of prefix instructions themselves
(VPT / VPST), and all the machinery needed to work with them in
assembly and disassembly (e.g. generating the 't' and 'e' mnemonic
suffixes on disassembled instructions within a predicated block)
I've added a couple of demo instructions that derive from the new
Tablegen base classes and use those two operand clusters. The bulk of
the vector instructions will come in followup commits small enough to
be manageable. (One exception is that I've added the full version of
`isMnemonicVPTPredicable` in the AsmParser, because it seemed
pointless to carefully split it up.)
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62669
llvm-svn: 363258
2019-06-13 21:11:13 +08:00
|
|
|
|
2019-06-18 23:05:42 +08:00
|
|
|
class MVE_VADDV<string iname, string suffix, dag iops, string cstr,
|
2019-06-14 22:31:13 +08:00
|
|
|
bit A, bit U, bits<2> size, list<dag> pattern=[]>
|
|
|
|
: MVE_rDest<(outs tGPREven:$Rda), iops, NoItinerary,
|
|
|
|
iname, suffix, "$Rda, $Qm", cstr, pattern> {
|
|
|
|
bits<3> Qm;
|
|
|
|
bits<4> Rda;
|
|
|
|
|
|
|
|
let Inst{28} = U;
|
|
|
|
let Inst{22-20} = 0b111;
|
|
|
|
let Inst{19-18} = size{1-0};
|
|
|
|
let Inst{17-16} = 0b01;
|
|
|
|
let Inst{15-13} = Rda{3-1};
|
|
|
|
let Inst{12} = 0b0;
|
|
|
|
let Inst{8-6} = 0b100;
|
|
|
|
let Inst{5} = A;
|
|
|
|
let Inst{3-1} = Qm{2-0};
|
|
|
|
let Inst{0} = 0b0;
|
|
|
|
}
|
|
|
|
|
2019-06-18 23:05:42 +08:00
|
|
|
multiclass MVE_VADDV_A<string suffix, bit U, bits<2> size,
|
|
|
|
list<dag> pattern=[]> {
|
|
|
|
def acc : MVE_VADDV<"vaddva", suffix,
|
|
|
|
(ins tGPREven:$Rda_src, MQPR:$Qm), "$Rda = $Rda_src",
|
|
|
|
0b1, U, size, pattern>;
|
|
|
|
def no_acc : MVE_VADDV<"vaddv", suffix,
|
|
|
|
(ins MQPR:$Qm), "",
|
|
|
|
0b0, U, size, pattern>;
|
2019-06-14 22:31:13 +08:00
|
|
|
}
|
|
|
|
|
2019-06-18 23:05:42 +08:00
|
|
|
defm MVE_VADDVs8 : MVE_VADDV_A<"s8", 0b0, 0b00>;
|
|
|
|
defm MVE_VADDVs16 : MVE_VADDV_A<"s16", 0b0, 0b01>;
|
|
|
|
defm MVE_VADDVs32 : MVE_VADDV_A<"s32", 0b0, 0b10>;
|
|
|
|
defm MVE_VADDVu8 : MVE_VADDV_A<"u8", 0b1, 0b00>;
|
|
|
|
defm MVE_VADDVu16 : MVE_VADDV_A<"u16", 0b1, 0b01>;
|
|
|
|
defm MVE_VADDVu32 : MVE_VADDV_A<"u32", 0b1, 0b10>;
|
2019-06-14 22:31:13 +08:00
|
|
|
|
2019-08-19 17:38:28 +08:00
|
|
|
let Predicates = [HasMVEInt] in {
|
|
|
|
def : Pat<(i32 (vecreduce_add (v4i32 MQPR:$src))), (i32 (MVE_VADDVu32no_acc $src))>;
|
|
|
|
def : Pat<(i32 (vecreduce_add (v8i16 MQPR:$src))), (i32 (MVE_VADDVu16no_acc $src))>;
|
|
|
|
def : Pat<(i32 (vecreduce_add (v16i8 MQPR:$src))), (i32 (MVE_VADDVu8no_acc $src))>;
|
2019-08-21 00:33:34 +08:00
|
|
|
def : Pat<(i32 (add (i32 (vecreduce_add (v4i32 MQPR:$src1))), (i32 tGPR:$src2))),
|
|
|
|
(i32 (MVE_VADDVu32acc $src2, $src1))>;
|
|
|
|
def : Pat<(i32 (add (i32 (vecreduce_add (v8i16 MQPR:$src1))), (i32 tGPR:$src2))),
|
|
|
|
(i32 (MVE_VADDVu16acc $src2, $src1))>;
|
|
|
|
def : Pat<(i32 (add (i32 (vecreduce_add (v16i8 MQPR:$src1))), (i32 tGPR:$src2))),
|
|
|
|
(i32 (MVE_VADDVu8acc $src2, $src1))>;
|
|
|
|
|
2019-08-19 17:38:28 +08:00
|
|
|
}
|
|
|
|
|
2019-06-18 23:05:42 +08:00
|
|
|
class MVE_VADDLV<string iname, string suffix, dag iops, string cstr,
|
2019-06-14 22:31:13 +08:00
|
|
|
bit A, bit U, list<dag> pattern=[]>
|
|
|
|
: MVE_rDest<(outs tGPREven:$RdaLo, tGPROdd:$RdaHi), iops, NoItinerary, iname,
|
|
|
|
suffix, "$RdaLo, $RdaHi, $Qm", cstr, pattern> {
|
|
|
|
bits<3> Qm;
|
|
|
|
bits<4> RdaLo;
|
|
|
|
bits<4> RdaHi;
|
|
|
|
|
|
|
|
let Inst{28} = U;
|
|
|
|
let Inst{22-20} = RdaHi{3-1};
|
|
|
|
let Inst{19-18} = 0b10;
|
|
|
|
let Inst{17-16} = 0b01;
|
|
|
|
let Inst{15-13} = RdaLo{3-1};
|
|
|
|
let Inst{12} = 0b0;
|
|
|
|
let Inst{8-6} = 0b100;
|
|
|
|
let Inst{5} = A;
|
|
|
|
let Inst{3-1} = Qm{2-0};
|
|
|
|
let Inst{0} = 0b0;
|
|
|
|
}
|
|
|
|
|
2019-06-18 23:05:42 +08:00
|
|
|
multiclass MVE_VADDLV_A<string suffix, bit U, list<dag> pattern=[]> {
|
|
|
|
def acc : MVE_VADDLV<"vaddlva", suffix,
|
2019-06-14 22:31:13 +08:00
|
|
|
(ins tGPREven:$RdaLo_src, tGPROdd:$RdaHi_src, MQPR:$Qm),
|
|
|
|
"$RdaLo = $RdaLo_src,$RdaHi = $RdaHi_src",
|
|
|
|
0b1, U, pattern>;
|
2019-06-18 23:05:42 +08:00
|
|
|
def no_acc : MVE_VADDLV<"vaddlv", suffix,
|
2019-06-14 22:31:13 +08:00
|
|
|
(ins MQPR:$Qm), "",
|
|
|
|
0b0, U, pattern>;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2019-06-18 23:05:42 +08:00
|
|
|
defm MVE_VADDLVs32 : MVE_VADDLV_A<"s32", 0b0>;
|
|
|
|
defm MVE_VADDLVu32 : MVE_VADDLV_A<"u32", 0b1>;
|
2019-06-14 22:31:13 +08:00
|
|
|
|
2019-06-18 23:05:42 +08:00
|
|
|
class MVE_VMINMAXNMV<string iname, string suffix, bit sz,
|
|
|
|
bit bit_17, bit bit_7, list<dag> pattern=[]>
|
2019-06-14 22:31:13 +08:00
|
|
|
: MVE_rDest<(outs rGPR:$RdaDest), (ins rGPR:$RdaSrc, MQPR:$Qm),
|
|
|
|
NoItinerary, iname, suffix, "$RdaSrc, $Qm",
|
|
|
|
"$RdaDest = $RdaSrc", pattern> {
|
|
|
|
bits<3> Qm;
|
|
|
|
bits<4> RdaDest;
|
|
|
|
|
|
|
|
let Inst{28} = sz;
|
|
|
|
let Inst{22-20} = 0b110;
|
|
|
|
let Inst{19-18} = 0b11;
|
|
|
|
let Inst{17} = bit_17;
|
|
|
|
let Inst{16} = 0b0;
|
|
|
|
let Inst{15-12} = RdaDest{3-0};
|
|
|
|
let Inst{8} = 0b1;
|
|
|
|
let Inst{7} = bit_7;
|
|
|
|
let Inst{6-5} = 0b00;
|
|
|
|
let Inst{3-1} = Qm{2-0};
|
|
|
|
let Inst{0} = 0b0;
|
|
|
|
|
|
|
|
let Predicates = [HasMVEFloat];
|
|
|
|
}
|
|
|
|
|
2019-06-18 23:05:42 +08:00
|
|
|
multiclass MVE_VMINMAXNMV_fty<string iname, bit bit_7, list<dag> pattern=[]> {
|
|
|
|
def f32 : MVE_VMINMAXNMV<iname, "f32", 0b0, 0b1, bit_7, pattern>;
|
|
|
|
def f16 : MVE_VMINMAXNMV<iname, "f16", 0b1, 0b1, bit_7, pattern>;
|
2019-06-14 22:31:13 +08:00
|
|
|
}
|
|
|
|
|
2019-06-18 23:05:42 +08:00
|
|
|
defm MVE_VMINNMV : MVE_VMINMAXNMV_fty<"vminnmv", 0b1>;
|
|
|
|
defm MVE_VMAXNMV : MVE_VMINMAXNMV_fty<"vmaxnmv", 0b0>;
|
2019-06-14 22:31:13 +08:00
|
|
|
|
2019-06-18 23:05:42 +08:00
|
|
|
multiclass MVE_VMINMAXNMAV_fty<string iname, bit bit_7, list<dag> pattern=[]> {
|
|
|
|
def f32 : MVE_VMINMAXNMV<iname, "f32", 0b0, 0b0, bit_7, pattern>;
|
|
|
|
def f16 : MVE_VMINMAXNMV<iname, "f16", 0b1, 0b0, bit_7, pattern>;
|
2019-06-14 22:31:13 +08:00
|
|
|
}
|
|
|
|
|
2019-06-18 23:05:42 +08:00
|
|
|
defm MVE_VMINNMAV : MVE_VMINMAXNMAV_fty<"vminnmav", 0b1>;
|
|
|
|
defm MVE_VMAXNMAV : MVE_VMINMAXNMAV_fty<"vmaxnmav", 0b0>;
|
2019-06-14 22:31:13 +08:00
|
|
|
|
2019-06-18 23:05:42 +08:00
|
|
|
class MVE_VMINMAXV<string iname, string suffix, bit U, bits<2> size,
|
2019-06-14 22:31:13 +08:00
|
|
|
bit bit_17, bit bit_7, list<dag> pattern=[]>
|
|
|
|
: MVE_rDest<(outs rGPR:$RdaDest), (ins rGPR:$RdaSrc, MQPR:$Qm), NoItinerary,
|
|
|
|
iname, suffix, "$RdaSrc, $Qm", "$RdaDest = $RdaSrc", pattern> {
|
|
|
|
bits<3> Qm;
|
|
|
|
bits<4> RdaDest;
|
|
|
|
|
|
|
|
let Inst{28} = U;
|
|
|
|
let Inst{22-20} = 0b110;
|
|
|
|
let Inst{19-18} = size{1-0};
|
|
|
|
let Inst{17} = bit_17;
|
|
|
|
let Inst{16} = 0b0;
|
|
|
|
let Inst{15-12} = RdaDest{3-0};
|
|
|
|
let Inst{8} = 0b1;
|
|
|
|
let Inst{7} = bit_7;
|
|
|
|
let Inst{6-5} = 0b00;
|
|
|
|
let Inst{3-1} = Qm{2-0};
|
|
|
|
let Inst{0} = 0b0;
|
|
|
|
}
|
|
|
|
|
[ARM] Begin adding IR intrinsics for MVE instructions.
This commit, together with the next few, will add a representative
sample of the kind of IR intrinsics that we'll need in order to
implement the user-facing ACLE intrinsics for MVE. Supporting all of
them will take more work; the intention of this initial series of
commits is to implement an intrinsic or two from lots of different
categories, as examples and proofs of concept.
This initial commit introduces a small number of IR intrinsics for
instructions simple enough that they can use Tablegen ISel patterns:
the predicated versions of the VADD and VSUB instructions (both
integer and FP), VMIN and VMAX, and the float->half VCVT instruction
(predicated and unpredicated).
When using VPT-predicated instructions in automatic code generation,
it will be convenient to specify the predicate value as a vector of
the appropriate number of i1. To make it easy to specify all sizes of
an instruction in one go and give each one the matching predicate
vector type, I've added a system of Tablegen informational records
describing MVE's vector types: each one gives the underlying LLVM IR
ValueType (which may not be the same if the MVE vector is of
explicitly signed or unsigned integers) and an appropriate vNi1 to use
as the predicate vector.
(Also, those info records include the usual encoding for the types, so
that as we add associations between each instruction encoding and one
of the new `MVEVectorVTInfo` records, we can remove some of the
existing template parameters and replace them with references to the
vector type info's fields.)
The user-facing ACLE intrinsics will receive a predicate mask as a
16-bit integer, so I've also provided a pair of intrinsics i2v and
v2i, to convert between an integer and a vector of i1 by just changing
the register class.
Reviewers: dmgreen, miyuki, ostannard
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D67158
2019-10-08 00:00:51 +08:00
|
|
|
multiclass MVE_VMINMAXV_p<string iname, bit bit_17, bit bit_7,
|
|
|
|
MVEVectorVTInfo VTI, Intrinsic intr> {
|
|
|
|
def "": MVE_VMINMAXV<iname, VTI.Suffix, VTI.Unsigned, VTI.Size,
|
|
|
|
bit_17, bit_7>;
|
|
|
|
|
|
|
|
let Predicates = [HasMVEInt] in
|
|
|
|
def _pat : Pat<(i32 (intr (i32 rGPR:$prev), (VTI.Vec MQPR:$vec))),
|
|
|
|
(i32 (!cast<Instruction>(NAME)
|
|
|
|
(i32 rGPR:$prev), (VTI.Vec MQPR:$vec)))>;
|
2019-06-14 22:31:13 +08:00
|
|
|
}
|
|
|
|
|
[ARM] Begin adding IR intrinsics for MVE instructions.
This commit, together with the next few, will add a representative
sample of the kind of IR intrinsics that we'll need in order to
implement the user-facing ACLE intrinsics for MVE. Supporting all of
them will take more work; the intention of this initial series of
commits is to implement an intrinsic or two from lots of different
categories, as examples and proofs of concept.
This initial commit introduces a small number of IR intrinsics for
instructions simple enough that they can use Tablegen ISel patterns:
the predicated versions of the VADD and VSUB instructions (both
integer and FP), VMIN and VMAX, and the float->half VCVT instruction
(predicated and unpredicated).
When using VPT-predicated instructions in automatic code generation,
it will be convenient to specify the predicate value as a vector of
the appropriate number of i1. To make it easy to specify all sizes of
an instruction in one go and give each one the matching predicate
vector type, I've added a system of Tablegen informational records
describing MVE's vector types: each one gives the underlying LLVM IR
ValueType (which may not be the same if the MVE vector is of
explicitly signed or unsigned integers) and an appropriate vNi1 to use
as the predicate vector.
(Also, those info records include the usual encoding for the types, so
that as we add associations between each instruction encoding and one
of the new `MVEVectorVTInfo` records, we can remove some of the
existing template parameters and replace them with references to the
vector type info's fields.)
The user-facing ACLE intrinsics will receive a predicate mask as a
16-bit integer, so I've also provided a pair of intrinsics i2v and
v2i, to convert between an integer and a vector of i1 by just changing
the register class.
Reviewers: dmgreen, miyuki, ostannard
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D67158
2019-10-08 00:00:51 +08:00
|
|
|
multiclass MVE_VMINMAXV_ty<string iname, bit bit_7,
|
|
|
|
Intrinsic intr_s, Intrinsic intr_u> {
|
|
|
|
defm s8 : MVE_VMINMAXV_p<iname, 1, bit_7, MVE_v16s8, intr_s>;
|
|
|
|
defm s16: MVE_VMINMAXV_p<iname, 1, bit_7, MVE_v8s16, intr_s>;
|
|
|
|
defm s32: MVE_VMINMAXV_p<iname, 1, bit_7, MVE_v4s32, intr_s>;
|
|
|
|
defm u8 : MVE_VMINMAXV_p<iname, 1, bit_7, MVE_v16u8, intr_u>;
|
|
|
|
defm u16: MVE_VMINMAXV_p<iname, 1, bit_7, MVE_v8u16, intr_u>;
|
|
|
|
defm u32: MVE_VMINMAXV_p<iname, 1, bit_7, MVE_v4u32, intr_u>;
|
|
|
|
}
|
|
|
|
|
|
|
|
defm MVE_VMINV : MVE_VMINMAXV_ty<
|
|
|
|
"vminv", 0b1, int_arm_mve_minv_s, int_arm_mve_minv_u>;
|
|
|
|
defm MVE_VMAXV : MVE_VMINMAXV_ty<
|
|
|
|
"vmaxv", 0b0, int_arm_mve_maxv_s, int_arm_mve_maxv_u>;
|
2019-06-14 22:31:13 +08:00
|
|
|
|
2019-09-13 17:11:46 +08:00
|
|
|
let Predicates = [HasMVEInt] in {
|
|
|
|
def : Pat<(i32 (vecreduce_smax (v16i8 MQPR:$src))),
|
|
|
|
(i32 (MVE_VMAXVs8 (t2MVNi (i32 127)), $src))>;
|
|
|
|
def : Pat<(i32 (vecreduce_smax (v8i16 MQPR:$src))),
|
|
|
|
(i32 (MVE_VMAXVs16 (t2MOVi32imm (i32 -32768)), $src))>;
|
|
|
|
def : Pat<(i32 (vecreduce_smax (v4i32 MQPR:$src))),
|
|
|
|
(i32 (MVE_VMAXVs32 (t2MOVi (i32 -2147483648)), $src))>;
|
|
|
|
def : Pat<(i32 (vecreduce_umax (v16i8 MQPR:$src))),
|
|
|
|
(i32 (MVE_VMAXVu8 (t2MOVi (i32 0)), $src))>;
|
|
|
|
def : Pat<(i32 (vecreduce_umax (v8i16 MQPR:$src))),
|
|
|
|
(i32 (MVE_VMAXVu16 (t2MOVi (i32 0)), $src))>;
|
|
|
|
def : Pat<(i32 (vecreduce_umax (v4i32 MQPR:$src))),
|
|
|
|
(i32 (MVE_VMAXVu32 (t2MOVi (i32 0)), $src))>;
|
|
|
|
|
|
|
|
def : Pat<(i32 (vecreduce_smin (v16i8 MQPR:$src))),
|
|
|
|
(i32 (MVE_VMINVs8 (t2MOVi (i32 127)), $src))>;
|
|
|
|
def : Pat<(i32 (vecreduce_smin (v8i16 MQPR:$src))),
|
|
|
|
(i32 (MVE_VMINVs16 (t2MOVi16 (i32 32767)), $src))>;
|
|
|
|
def : Pat<(i32 (vecreduce_smin (v4i32 MQPR:$src))),
|
|
|
|
(i32 (MVE_VMINVs32 (t2MVNi (i32 -2147483648)), $src))>;
|
|
|
|
def : Pat<(i32 (vecreduce_umin (v16i8 MQPR:$src))),
|
|
|
|
(i32 (MVE_VMINVu8 (t2MOVi (i32 255)), $src))>;
|
|
|
|
def : Pat<(i32 (vecreduce_umin (v8i16 MQPR:$src))),
|
|
|
|
(i32 (MVE_VMINVu16 (t2MOVi16 (i32 65535)), $src))>;
|
|
|
|
def : Pat<(i32 (vecreduce_umin (v4i32 MQPR:$src))),
|
|
|
|
(i32 (MVE_VMINVu32 (t2MOVi (i32 4294967295)), $src))>;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
2019-06-18 23:05:42 +08:00
|
|
|
multiclass MVE_VMINMAXAV_ty<string iname, bit bit_7, list<dag> pattern=[]> {
|
|
|
|
def s8 : MVE_VMINMAXV<iname, "s8", 0b0, 0b00, 0b0, bit_7>;
|
|
|
|
def s16 : MVE_VMINMAXV<iname, "s16", 0b0, 0b01, 0b0, bit_7>;
|
|
|
|
def s32 : MVE_VMINMAXV<iname, "s32", 0b0, 0b10, 0b0, bit_7>;
|
2019-06-14 22:31:13 +08:00
|
|
|
}
|
|
|
|
|
2019-06-18 23:05:42 +08:00
|
|
|
defm MVE_VMINAV : MVE_VMINMAXAV_ty<"vminav", 0b1>;
|
|
|
|
defm MVE_VMAXAV : MVE_VMINMAXAV_ty<"vmaxav", 0b0>;
|
2019-06-14 22:31:13 +08:00
|
|
|
|
2019-06-18 23:05:42 +08:00
|
|
|
class MVE_VMLAMLSDAV<string iname, string suffix, dag iops, string cstr,
|
2019-06-14 22:31:13 +08:00
|
|
|
bit sz, bit bit_28, bit A, bit X, bit bit_8, bit bit_0,
|
|
|
|
list<dag> pattern=[]>
|
|
|
|
: MVE_rDest<(outs tGPREven:$RdaDest), iops, NoItinerary, iname, suffix,
|
|
|
|
"$RdaDest, $Qn, $Qm", cstr, pattern> {
|
|
|
|
bits<4> RdaDest;
|
|
|
|
bits<3> Qm;
|
|
|
|
bits<3> Qn;
|
|
|
|
|
|
|
|
let Inst{28} = bit_28;
|
|
|
|
let Inst{22-20} = 0b111;
|
|
|
|
let Inst{19-17} = Qn{2-0};
|
|
|
|
let Inst{16} = sz;
|
|
|
|
let Inst{15-13} = RdaDest{3-1};
|
|
|
|
let Inst{12} = X;
|
|
|
|
let Inst{8} = bit_8;
|
|
|
|
let Inst{7-6} = 0b00;
|
|
|
|
let Inst{5} = A;
|
|
|
|
let Inst{3-1} = Qm{2-0};
|
|
|
|
let Inst{0} = bit_0;
|
|
|
|
}
|
|
|
|
|
[ARM] Remove some spurious MVE reduction instructions.
The family of 'dual-accumulating' vector multiply-add instructions
(VMLADAV, VMLALDAV and VRMLALDAVH) can all operate on both signed and
unsigned integer types, and they all have an 'exchange' variant (with
an X in the name) that modifies which pairs of vector lanes in the two
inputs are multiplied together. But there's a clause in the spec that
says that the X variants //don't// operate on unsigned integer types,
only signed. You can have X, or unsigned, or neither, but not both.
We didn't notice that clause when we implemented the MC support for
these instructions, so LLVM believes that things like VMLADAVX.U8 do
exist, contradicting the spec. Here I fix that by conditioning them
out in Tablegen.
In order to do that, I've reversed the nesting order of the Tablegen
multiclasses for those instructions. Previously, the innermost
multiclass generated the X and not-X variants, and the one outside
that generated the A and not-A variants. Now X is done by the outer
multiclass, which allows me to bypass that one when I only want the
two not-X variants.
Changing the multiclass nesting order also changes the names of the
instruction ids unless I make a special effort not to. I decided that
while I was changing them anyway I'd make them look nicer; so now the
instructions have names like MVE_VMLADAVs32 or MVE_VMLADAVaxs32,
instead of cumbersome _noacc_noexch suffixes.
The corresponding multiply-subtract instructions are unaffected. Those
don't accept unsigned types at all, either in the spec or in LLVM.
Reviewers: ostannard, dmgreen
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D67214
llvm-svn: 371405
2019-09-09 23:17:26 +08:00
|
|
|
multiclass MVE_VMLAMLSDAV_A<string iname, string x, string suffix,
|
|
|
|
bit sz, bit bit_28, bit X, bit bit_8, bit bit_0,
|
|
|
|
list<dag> pattern=[]> {
|
|
|
|
def ""#x#suffix : MVE_VMLAMLSDAV<iname # x, suffix,
|
|
|
|
(ins MQPR:$Qn, MQPR:$Qm), "",
|
|
|
|
sz, bit_28, 0b0, X, bit_8, bit_0, pattern>;
|
|
|
|
def "a"#x#suffix : MVE_VMLAMLSDAV<iname # "a" # x, suffix,
|
|
|
|
(ins tGPREven:$RdaSrc, MQPR:$Qn, MQPR:$Qm),
|
|
|
|
"$RdaDest = $RdaSrc",
|
|
|
|
sz, bit_28, 0b1, X, bit_8, bit_0, pattern>;
|
|
|
|
}
|
|
|
|
|
|
|
|
multiclass MVE_VMLAMLSDAV_AX<string iname, string suffix, bit sz, bit bit_28,
|
|
|
|
bit bit_8, bit bit_0, list<dag> pattern=[]> {
|
|
|
|
defm "" : MVE_VMLAMLSDAV_A<iname, "", suffix, sz, bit_28,
|
|
|
|
0b0, bit_8, bit_0, pattern>;
|
|
|
|
defm "" : MVE_VMLAMLSDAV_A<iname, "x", suffix, sz, bit_28,
|
|
|
|
0b1, bit_8, bit_0, pattern>;
|
2019-06-14 22:31:13 +08:00
|
|
|
}
|
|
|
|
|
[ARM] Remove some spurious MVE reduction instructions.
The family of 'dual-accumulating' vector multiply-add instructions
(VMLADAV, VMLALDAV and VRMLALDAVH) can all operate on both signed and
unsigned integer types, and they all have an 'exchange' variant (with
an X in the name) that modifies which pairs of vector lanes in the two
inputs are multiplied together. But there's a clause in the spec that
says that the X variants //don't// operate on unsigned integer types,
only signed. You can have X, or unsigned, or neither, but not both.
We didn't notice that clause when we implemented the MC support for
these instructions, so LLVM believes that things like VMLADAVX.U8 do
exist, contradicting the spec. Here I fix that by conditioning them
out in Tablegen.
In order to do that, I've reversed the nesting order of the Tablegen
multiclasses for those instructions. Previously, the innermost
multiclass generated the X and not-X variants, and the one outside
that generated the A and not-A variants. Now X is done by the outer
multiclass, which allows me to bypass that one when I only want the
two not-X variants.
Changing the multiclass nesting order also changes the names of the
instruction ids unless I make a special effort not to. I decided that
while I was changing them anyway I'd make them look nicer; so now the
instructions have names like MVE_VMLADAVs32 or MVE_VMLADAVaxs32,
instead of cumbersome _noacc_noexch suffixes.
The corresponding multiply-subtract instructions are unaffected. Those
don't accept unsigned types at all, either in the spec or in LLVM.
Reviewers: ostannard, dmgreen
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D67214
llvm-svn: 371405
2019-09-09 23:17:26 +08:00
|
|
|
multiclass MVE_VMLADAV_multi<string suffix, bit sz, bit bit_8,
|
|
|
|
list<dag> pattern=[]> {
|
|
|
|
defm "" : MVE_VMLAMLSDAV_AX<"vmladav", "s"#suffix,
|
|
|
|
sz, 0b0, bit_8, 0b0, pattern>;
|
|
|
|
defm "" : MVE_VMLAMLSDAV_A<"vmladav", "", "u"#suffix,
|
|
|
|
sz, 0b1, 0b0, bit_8, 0b0, pattern>;
|
2019-06-14 22:31:13 +08:00
|
|
|
}
|
|
|
|
|
[ARM] Remove some spurious MVE reduction instructions.
The family of 'dual-accumulating' vector multiply-add instructions
(VMLADAV, VMLALDAV and VRMLALDAVH) can all operate on both signed and
unsigned integer types, and they all have an 'exchange' variant (with
an X in the name) that modifies which pairs of vector lanes in the two
inputs are multiplied together. But there's a clause in the spec that
says that the X variants //don't// operate on unsigned integer types,
only signed. You can have X, or unsigned, or neither, but not both.
We didn't notice that clause when we implemented the MC support for
these instructions, so LLVM believes that things like VMLADAVX.U8 do
exist, contradicting the spec. Here I fix that by conditioning them
out in Tablegen.
In order to do that, I've reversed the nesting order of the Tablegen
multiclasses for those instructions. Previously, the innermost
multiclass generated the X and not-X variants, and the one outside
that generated the A and not-A variants. Now X is done by the outer
multiclass, which allows me to bypass that one when I only want the
two not-X variants.
Changing the multiclass nesting order also changes the names of the
instruction ids unless I make a special effort not to. I decided that
while I was changing them anyway I'd make them look nicer; so now the
instructions have names like MVE_VMLADAVs32 or MVE_VMLADAVaxs32,
instead of cumbersome _noacc_noexch suffixes.
The corresponding multiply-subtract instructions are unaffected. Those
don't accept unsigned types at all, either in the spec or in LLVM.
Reviewers: ostannard, dmgreen
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D67214
llvm-svn: 371405
2019-09-09 23:17:26 +08:00
|
|
|
multiclass MVE_VMLSDAV_multi<string suffix, bit sz, bit bit_28,
|
|
|
|
list<dag> pattern=[]> {
|
|
|
|
defm "" : MVE_VMLAMLSDAV_AX<"vmlsdav", "s"#suffix,
|
|
|
|
sz, bit_28, 0b0, 0b1, pattern>;
|
2019-06-14 22:31:13 +08:00
|
|
|
}
|
|
|
|
|
[ARM] Remove some spurious MVE reduction instructions.
The family of 'dual-accumulating' vector multiply-add instructions
(VMLADAV, VMLALDAV and VRMLALDAVH) can all operate on both signed and
unsigned integer types, and they all have an 'exchange' variant (with
an X in the name) that modifies which pairs of vector lanes in the two
inputs are multiplied together. But there's a clause in the spec that
says that the X variants //don't// operate on unsigned integer types,
only signed. You can have X, or unsigned, or neither, but not both.
We didn't notice that clause when we implemented the MC support for
these instructions, so LLVM believes that things like VMLADAVX.U8 do
exist, contradicting the spec. Here I fix that by conditioning them
out in Tablegen.
In order to do that, I've reversed the nesting order of the Tablegen
multiclasses for those instructions. Previously, the innermost
multiclass generated the X and not-X variants, and the one outside
that generated the A and not-A variants. Now X is done by the outer
multiclass, which allows me to bypass that one when I only want the
two not-X variants.
Changing the multiclass nesting order also changes the names of the
instruction ids unless I make a special effort not to. I decided that
while I was changing them anyway I'd make them look nicer; so now the
instructions have names like MVE_VMLADAVs32 or MVE_VMLADAVaxs32,
instead of cumbersome _noacc_noexch suffixes.
The corresponding multiply-subtract instructions are unaffected. Those
don't accept unsigned types at all, either in the spec or in LLVM.
Reviewers: ostannard, dmgreen
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D67214
llvm-svn: 371405
2019-09-09 23:17:26 +08:00
|
|
|
defm MVE_VMLADAV : MVE_VMLADAV_multi< "8", 0b0, 0b1>;
|
|
|
|
defm MVE_VMLADAV : MVE_VMLADAV_multi<"16", 0b0, 0b0>;
|
|
|
|
defm MVE_VMLADAV : MVE_VMLADAV_multi<"32", 0b1, 0b0>;
|
2019-06-14 22:31:13 +08:00
|
|
|
|
[ARM] Remove some spurious MVE reduction instructions.
The family of 'dual-accumulating' vector multiply-add instructions
(VMLADAV, VMLALDAV and VRMLALDAVH) can all operate on both signed and
unsigned integer types, and they all have an 'exchange' variant (with
an X in the name) that modifies which pairs of vector lanes in the two
inputs are multiplied together. But there's a clause in the spec that
says that the X variants //don't// operate on unsigned integer types,
only signed. You can have X, or unsigned, or neither, but not both.
We didn't notice that clause when we implemented the MC support for
these instructions, so LLVM believes that things like VMLADAVX.U8 do
exist, contradicting the spec. Here I fix that by conditioning them
out in Tablegen.
In order to do that, I've reversed the nesting order of the Tablegen
multiclasses for those instructions. Previously, the innermost
multiclass generated the X and not-X variants, and the one outside
that generated the A and not-A variants. Now X is done by the outer
multiclass, which allows me to bypass that one when I only want the
two not-X variants.
Changing the multiclass nesting order also changes the names of the
instruction ids unless I make a special effort not to. I decided that
while I was changing them anyway I'd make them look nicer; so now the
instructions have names like MVE_VMLADAVs32 or MVE_VMLADAVaxs32,
instead of cumbersome _noacc_noexch suffixes.
The corresponding multiply-subtract instructions are unaffected. Those
don't accept unsigned types at all, either in the spec or in LLVM.
Reviewers: ostannard, dmgreen
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D67214
llvm-svn: 371405
2019-09-09 23:17:26 +08:00
|
|
|
defm MVE_VMLSDAV : MVE_VMLSDAV_multi< "8", 0b0, 0b1>;
|
|
|
|
defm MVE_VMLSDAV : MVE_VMLSDAV_multi<"16", 0b0, 0b0>;
|
|
|
|
defm MVE_VMLSDAV : MVE_VMLSDAV_multi<"32", 0b1, 0b0>;
|
2019-06-14 22:31:13 +08:00
|
|
|
|
|
|
|
// vmlav aliases vmladav
|
[ARM] Remove some spurious MVE reduction instructions.
The family of 'dual-accumulating' vector multiply-add instructions
(VMLADAV, VMLALDAV and VRMLALDAVH) can all operate on both signed and
unsigned integer types, and they all have an 'exchange' variant (with
an X in the name) that modifies which pairs of vector lanes in the two
inputs are multiplied together. But there's a clause in the spec that
says that the X variants //don't// operate on unsigned integer types,
only signed. You can have X, or unsigned, or neither, but not both.
We didn't notice that clause when we implemented the MC support for
these instructions, so LLVM believes that things like VMLADAVX.U8 do
exist, contradicting the spec. Here I fix that by conditioning them
out in Tablegen.
In order to do that, I've reversed the nesting order of the Tablegen
multiclasses for those instructions. Previously, the innermost
multiclass generated the X and not-X variants, and the one outside
that generated the A and not-A variants. Now X is done by the outer
multiclass, which allows me to bypass that one when I only want the
two not-X variants.
Changing the multiclass nesting order also changes the names of the
instruction ids unless I make a special effort not to. I decided that
while I was changing them anyway I'd make them look nicer; so now the
instructions have names like MVE_VMLADAVs32 or MVE_VMLADAVaxs32,
instead of cumbersome _noacc_noexch suffixes.
The corresponding multiply-subtract instructions are unaffected. Those
don't accept unsigned types at all, either in the spec or in LLVM.
Reviewers: ostannard, dmgreen
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D67214
llvm-svn: 371405
2019-09-09 23:17:26 +08:00
|
|
|
foreach acc = ["", "a"] in {
|
2019-06-14 22:31:13 +08:00
|
|
|
foreach suffix = ["s8", "s16", "s32", "u8", "u16", "u32"] in {
|
[ARM] Remove some spurious MVE reduction instructions.
The family of 'dual-accumulating' vector multiply-add instructions
(VMLADAV, VMLALDAV and VRMLALDAVH) can all operate on both signed and
unsigned integer types, and they all have an 'exchange' variant (with
an X in the name) that modifies which pairs of vector lanes in the two
inputs are multiplied together. But there's a clause in the spec that
says that the X variants //don't// operate on unsigned integer types,
only signed. You can have X, or unsigned, or neither, but not both.
We didn't notice that clause when we implemented the MC support for
these instructions, so LLVM believes that things like VMLADAVX.U8 do
exist, contradicting the spec. Here I fix that by conditioning them
out in Tablegen.
In order to do that, I've reversed the nesting order of the Tablegen
multiclasses for those instructions. Previously, the innermost
multiclass generated the X and not-X variants, and the one outside
that generated the A and not-A variants. Now X is done by the outer
multiclass, which allows me to bypass that one when I only want the
two not-X variants.
Changing the multiclass nesting order also changes the names of the
instruction ids unless I make a special effort not to. I decided that
while I was changing them anyway I'd make them look nicer; so now the
instructions have names like MVE_VMLADAVs32 or MVE_VMLADAVaxs32,
instead of cumbersome _noacc_noexch suffixes.
The corresponding multiply-subtract instructions are unaffected. Those
don't accept unsigned types at all, either in the spec or in LLVM.
Reviewers: ostannard, dmgreen
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D67214
llvm-svn: 371405
2019-09-09 23:17:26 +08:00
|
|
|
def : MVEInstAlias<"vmlav"#acc#"${vp}."#suffix#"\t$RdaDest, $Qn, $Qm",
|
|
|
|
(!cast<Instruction>("MVE_VMLADAV"#acc#suffix)
|
2019-06-18 23:05:42 +08:00
|
|
|
tGPREven:$RdaDest, MQPR:$Qn, MQPR:$Qm, vpred_n:$vp)>;
|
2019-06-14 22:31:13 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Base class for VMLALDAV and VMLSLDAV, VRMLALDAVH, VRMLSLDAVH
|
2019-06-18 23:05:42 +08:00
|
|
|
class MVE_VMLALDAVBase<string iname, string suffix, dag iops, string cstr,
|
|
|
|
bit sz, bit bit_28, bit A, bit X, bit bit_8, bit bit_0,
|
|
|
|
list<dag> pattern=[]>
|
2019-06-14 22:31:13 +08:00
|
|
|
: MVE_rDest<(outs tGPREven:$RdaLoDest, tGPROdd:$RdaHiDest), iops, NoItinerary,
|
|
|
|
iname, suffix, "$RdaLoDest, $RdaHiDest, $Qn, $Qm", cstr, pattern> {
|
|
|
|
bits<4> RdaLoDest;
|
|
|
|
bits<4> RdaHiDest;
|
|
|
|
bits<3> Qm;
|
|
|
|
bits<3> Qn;
|
|
|
|
|
|
|
|
let Inst{28} = bit_28;
|
|
|
|
let Inst{22-20} = RdaHiDest{3-1};
|
|
|
|
let Inst{19-17} = Qn{2-0};
|
|
|
|
let Inst{16} = sz;
|
|
|
|
let Inst{15-13} = RdaLoDest{3-1};
|
|
|
|
let Inst{12} = X;
|
|
|
|
let Inst{8} = bit_8;
|
|
|
|
let Inst{7-6} = 0b00;
|
|
|
|
let Inst{5} = A;
|
|
|
|
let Inst{3-1} = Qm{2-0};
|
|
|
|
let Inst{0} = bit_0;
|
|
|
|
}
|
|
|
|
|
[ARM] Remove some spurious MVE reduction instructions.
The family of 'dual-accumulating' vector multiply-add instructions
(VMLADAV, VMLALDAV and VRMLALDAVH) can all operate on both signed and
unsigned integer types, and they all have an 'exchange' variant (with
an X in the name) that modifies which pairs of vector lanes in the two
inputs are multiplied together. But there's a clause in the spec that
says that the X variants //don't// operate on unsigned integer types,
only signed. You can have X, or unsigned, or neither, but not both.
We didn't notice that clause when we implemented the MC support for
these instructions, so LLVM believes that things like VMLADAVX.U8 do
exist, contradicting the spec. Here I fix that by conditioning them
out in Tablegen.
In order to do that, I've reversed the nesting order of the Tablegen
multiclasses for those instructions. Previously, the innermost
multiclass generated the X and not-X variants, and the one outside
that generated the A and not-A variants. Now X is done by the outer
multiclass, which allows me to bypass that one when I only want the
two not-X variants.
Changing the multiclass nesting order also changes the names of the
instruction ids unless I make a special effort not to. I decided that
while I was changing them anyway I'd make them look nicer; so now the
instructions have names like MVE_VMLADAVs32 or MVE_VMLADAVaxs32,
instead of cumbersome _noacc_noexch suffixes.
The corresponding multiply-subtract instructions are unaffected. Those
don't accept unsigned types at all, either in the spec or in LLVM.
Reviewers: ostannard, dmgreen
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D67214
llvm-svn: 371405
2019-09-09 23:17:26 +08:00
|
|
|
multiclass MVE_VMLALDAVBase_A<string iname, string x, string suffix,
|
|
|
|
bit sz, bit bit_28, bit X, bit bit_8, bit bit_0,
|
|
|
|
list<dag> pattern=[]> {
|
|
|
|
def ""#x#suffix : MVE_VMLALDAVBase<
|
|
|
|
iname # x, suffix, (ins MQPR:$Qn, MQPR:$Qm), "",
|
|
|
|
sz, bit_28, 0b0, X, bit_8, bit_0, pattern>;
|
|
|
|
def "a"#x#suffix : MVE_VMLALDAVBase<
|
|
|
|
iname # "a" # x, suffix,
|
|
|
|
(ins tGPREven:$RdaLoSrc, tGPROdd:$RdaHiSrc, MQPR:$Qn, MQPR:$Qm),
|
|
|
|
"$RdaLoDest = $RdaLoSrc,$RdaHiDest = $RdaHiSrc",
|
|
|
|
sz, bit_28, 0b1, X, bit_8, bit_0, pattern>;
|
2019-06-14 22:31:13 +08:00
|
|
|
}
|
|
|
|
|
[ARM] Remove some spurious MVE reduction instructions.
The family of 'dual-accumulating' vector multiply-add instructions
(VMLADAV, VMLALDAV and VRMLALDAVH) can all operate on both signed and
unsigned integer types, and they all have an 'exchange' variant (with
an X in the name) that modifies which pairs of vector lanes in the two
inputs are multiplied together. But there's a clause in the spec that
says that the X variants //don't// operate on unsigned integer types,
only signed. You can have X, or unsigned, or neither, but not both.
We didn't notice that clause when we implemented the MC support for
these instructions, so LLVM believes that things like VMLADAVX.U8 do
exist, contradicting the spec. Here I fix that by conditioning them
out in Tablegen.
In order to do that, I've reversed the nesting order of the Tablegen
multiclasses for those instructions. Previously, the innermost
multiclass generated the X and not-X variants, and the one outside
that generated the A and not-A variants. Now X is done by the outer
multiclass, which allows me to bypass that one when I only want the
two not-X variants.
Changing the multiclass nesting order also changes the names of the
instruction ids unless I make a special effort not to. I decided that
while I was changing them anyway I'd make them look nicer; so now the
instructions have names like MVE_VMLADAVs32 or MVE_VMLADAVaxs32,
instead of cumbersome _noacc_noexch suffixes.
The corresponding multiply-subtract instructions are unaffected. Those
don't accept unsigned types at all, either in the spec or in LLVM.
Reviewers: ostannard, dmgreen
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D67214
llvm-svn: 371405
2019-09-09 23:17:26 +08:00
|
|
|
|
|
|
|
multiclass MVE_VMLALDAVBase_AX<string iname, string suffix, bit sz, bit bit_28,
|
|
|
|
bit bit_8, bit bit_0, list<dag> pattern=[]> {
|
|
|
|
defm "" : MVE_VMLALDAVBase_A<iname, "", suffix, sz,
|
|
|
|
bit_28, 0b0, bit_8, bit_0, pattern>;
|
|
|
|
defm "" : MVE_VMLALDAVBase_A<iname, "x", suffix, sz,
|
|
|
|
bit_28, 0b1, bit_8, bit_0, pattern>;
|
2019-06-14 22:31:13 +08:00
|
|
|
}
|
|
|
|
|
[ARM] Remove some spurious MVE reduction instructions.
The family of 'dual-accumulating' vector multiply-add instructions
(VMLADAV, VMLALDAV and VRMLALDAVH) can all operate on both signed and
unsigned integer types, and they all have an 'exchange' variant (with
an X in the name) that modifies which pairs of vector lanes in the two
inputs are multiplied together. But there's a clause in the spec that
says that the X variants //don't// operate on unsigned integer types,
only signed. You can have X, or unsigned, or neither, but not both.
We didn't notice that clause when we implemented the MC support for
these instructions, so LLVM believes that things like VMLADAVX.U8 do
exist, contradicting the spec. Here I fix that by conditioning them
out in Tablegen.
In order to do that, I've reversed the nesting order of the Tablegen
multiclasses for those instructions. Previously, the innermost
multiclass generated the X and not-X variants, and the one outside
that generated the A and not-A variants. Now X is done by the outer
multiclass, which allows me to bypass that one when I only want the
two not-X variants.
Changing the multiclass nesting order also changes the names of the
instruction ids unless I make a special effort not to. I decided that
while I was changing them anyway I'd make them look nicer; so now the
instructions have names like MVE_VMLADAVs32 or MVE_VMLADAVaxs32,
instead of cumbersome _noacc_noexch suffixes.
The corresponding multiply-subtract instructions are unaffected. Those
don't accept unsigned types at all, either in the spec or in LLVM.
Reviewers: ostannard, dmgreen
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D67214
llvm-svn: 371405
2019-09-09 23:17:26 +08:00
|
|
|
multiclass MVE_VRMLALDAVH_multi<string suffix, list<dag> pattern=[]> {
|
|
|
|
defm "" : MVE_VMLALDAVBase_AX<"vrmlaldavh", "s"#suffix,
|
|
|
|
0b0, 0b0, 0b1, 0b0, pattern>;
|
|
|
|
defm "" : MVE_VMLALDAVBase_A<"vrmlaldavh", "", "u"#suffix,
|
|
|
|
0b0, 0b1, 0b0, 0b1, 0b0, pattern>;
|
2019-06-14 22:31:13 +08:00
|
|
|
}
|
|
|
|
|
[ARM] Remove some spurious MVE reduction instructions.
The family of 'dual-accumulating' vector multiply-add instructions
(VMLADAV, VMLALDAV and VRMLALDAVH) can all operate on both signed and
unsigned integer types, and they all have an 'exchange' variant (with
an X in the name) that modifies which pairs of vector lanes in the two
inputs are multiplied together. But there's a clause in the spec that
says that the X variants //don't// operate on unsigned integer types,
only signed. You can have X, or unsigned, or neither, but not both.
We didn't notice that clause when we implemented the MC support for
these instructions, so LLVM believes that things like VMLADAVX.U8 do
exist, contradicting the spec. Here I fix that by conditioning them
out in Tablegen.
In order to do that, I've reversed the nesting order of the Tablegen
multiclasses for those instructions. Previously, the innermost
multiclass generated the X and not-X variants, and the one outside
that generated the A and not-A variants. Now X is done by the outer
multiclass, which allows me to bypass that one when I only want the
two not-X variants.
Changing the multiclass nesting order also changes the names of the
instruction ids unless I make a special effort not to. I decided that
while I was changing them anyway I'd make them look nicer; so now the
instructions have names like MVE_VMLADAVs32 or MVE_VMLADAVaxs32,
instead of cumbersome _noacc_noexch suffixes.
The corresponding multiply-subtract instructions are unaffected. Those
don't accept unsigned types at all, either in the spec or in LLVM.
Reviewers: ostannard, dmgreen
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D67214
llvm-svn: 371405
2019-09-09 23:17:26 +08:00
|
|
|
defm MVE_VRMLALDAVH : MVE_VRMLALDAVH_multi<"32">;
|
2019-06-14 22:31:13 +08:00
|
|
|
|
|
|
|
// vrmlalvh aliases for vrmlaldavh
|
|
|
|
def : MVEInstAlias<"vrmlalvh${vp}.s32\t$RdaLo, $RdaHi, $Qn, $Qm",
|
[ARM] Remove some spurious MVE reduction instructions.
The family of 'dual-accumulating' vector multiply-add instructions
(VMLADAV, VMLALDAV and VRMLALDAVH) can all operate on both signed and
unsigned integer types, and they all have an 'exchange' variant (with
an X in the name) that modifies which pairs of vector lanes in the two
inputs are multiplied together. But there's a clause in the spec that
says that the X variants //don't// operate on unsigned integer types,
only signed. You can have X, or unsigned, or neither, but not both.
We didn't notice that clause when we implemented the MC support for
these instructions, so LLVM believes that things like VMLADAVX.U8 do
exist, contradicting the spec. Here I fix that by conditioning them
out in Tablegen.
In order to do that, I've reversed the nesting order of the Tablegen
multiclasses for those instructions. Previously, the innermost
multiclass generated the X and not-X variants, and the one outside
that generated the A and not-A variants. Now X is done by the outer
multiclass, which allows me to bypass that one when I only want the
two not-X variants.
Changing the multiclass nesting order also changes the names of the
instruction ids unless I make a special effort not to. I decided that
while I was changing them anyway I'd make them look nicer; so now the
instructions have names like MVE_VMLADAVs32 or MVE_VMLADAVaxs32,
instead of cumbersome _noacc_noexch suffixes.
The corresponding multiply-subtract instructions are unaffected. Those
don't accept unsigned types at all, either in the spec or in LLVM.
Reviewers: ostannard, dmgreen
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D67214
llvm-svn: 371405
2019-09-09 23:17:26 +08:00
|
|
|
(MVE_VRMLALDAVHs32
|
2019-06-18 23:05:42 +08:00
|
|
|
tGPREven:$RdaLo, tGPROdd:$RdaHi,
|
2019-06-14 22:31:13 +08:00
|
|
|
MQPR:$Qn, MQPR:$Qm, vpred_n:$vp)>;
|
|
|
|
def : MVEInstAlias<"vrmlalvha${vp}.s32\t$RdaLo, $RdaHi, $Qn, $Qm",
|
[ARM] Remove some spurious MVE reduction instructions.
The family of 'dual-accumulating' vector multiply-add instructions
(VMLADAV, VMLALDAV and VRMLALDAVH) can all operate on both signed and
unsigned integer types, and they all have an 'exchange' variant (with
an X in the name) that modifies which pairs of vector lanes in the two
inputs are multiplied together. But there's a clause in the spec that
says that the X variants //don't// operate on unsigned integer types,
only signed. You can have X, or unsigned, or neither, but not both.
We didn't notice that clause when we implemented the MC support for
these instructions, so LLVM believes that things like VMLADAVX.U8 do
exist, contradicting the spec. Here I fix that by conditioning them
out in Tablegen.
In order to do that, I've reversed the nesting order of the Tablegen
multiclasses for those instructions. Previously, the innermost
multiclass generated the X and not-X variants, and the one outside
that generated the A and not-A variants. Now X is done by the outer
multiclass, which allows me to bypass that one when I only want the
two not-X variants.
Changing the multiclass nesting order also changes the names of the
instruction ids unless I make a special effort not to. I decided that
while I was changing them anyway I'd make them look nicer; so now the
instructions have names like MVE_VMLADAVs32 or MVE_VMLADAVaxs32,
instead of cumbersome _noacc_noexch suffixes.
The corresponding multiply-subtract instructions are unaffected. Those
don't accept unsigned types at all, either in the spec or in LLVM.
Reviewers: ostannard, dmgreen
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D67214
llvm-svn: 371405
2019-09-09 23:17:26 +08:00
|
|
|
(MVE_VRMLALDAVHas32
|
2019-06-18 23:05:42 +08:00
|
|
|
tGPREven:$RdaLo, tGPROdd:$RdaHi,
|
2019-06-14 22:31:13 +08:00
|
|
|
MQPR:$Qn, MQPR:$Qm, vpred_n:$vp)>;
|
|
|
|
def : MVEInstAlias<"vrmlalvh${vp}.u32\t$RdaLo, $RdaHi, $Qn, $Qm",
|
[ARM] Remove some spurious MVE reduction instructions.
The family of 'dual-accumulating' vector multiply-add instructions
(VMLADAV, VMLALDAV and VRMLALDAVH) can all operate on both signed and
unsigned integer types, and they all have an 'exchange' variant (with
an X in the name) that modifies which pairs of vector lanes in the two
inputs are multiplied together. But there's a clause in the spec that
says that the X variants //don't// operate on unsigned integer types,
only signed. You can have X, or unsigned, or neither, but not both.
We didn't notice that clause when we implemented the MC support for
these instructions, so LLVM believes that things like VMLADAVX.U8 do
exist, contradicting the spec. Here I fix that by conditioning them
out in Tablegen.
In order to do that, I've reversed the nesting order of the Tablegen
multiclasses for those instructions. Previously, the innermost
multiclass generated the X and not-X variants, and the one outside
that generated the A and not-A variants. Now X is done by the outer
multiclass, which allows me to bypass that one when I only want the
two not-X variants.
Changing the multiclass nesting order also changes the names of the
instruction ids unless I make a special effort not to. I decided that
while I was changing them anyway I'd make them look nicer; so now the
instructions have names like MVE_VMLADAVs32 or MVE_VMLADAVaxs32,
instead of cumbersome _noacc_noexch suffixes.
The corresponding multiply-subtract instructions are unaffected. Those
don't accept unsigned types at all, either in the spec or in LLVM.
Reviewers: ostannard, dmgreen
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D67214
llvm-svn: 371405
2019-09-09 23:17:26 +08:00
|
|
|
(MVE_VRMLALDAVHu32
|
2019-06-18 23:05:42 +08:00
|
|
|
tGPREven:$RdaLo, tGPROdd:$RdaHi,
|
2019-06-14 22:31:13 +08:00
|
|
|
MQPR:$Qn, MQPR:$Qm, vpred_n:$vp)>;
|
|
|
|
def : MVEInstAlias<"vrmlalvha${vp}.u32\t$RdaLo, $RdaHi, $Qn, $Qm",
|
[ARM] Remove some spurious MVE reduction instructions.
The family of 'dual-accumulating' vector multiply-add instructions
(VMLADAV, VMLALDAV and VRMLALDAVH) can all operate on both signed and
unsigned integer types, and they all have an 'exchange' variant (with
an X in the name) that modifies which pairs of vector lanes in the two
inputs are multiplied together. But there's a clause in the spec that
says that the X variants //don't// operate on unsigned integer types,
only signed. You can have X, or unsigned, or neither, but not both.
We didn't notice that clause when we implemented the MC support for
these instructions, so LLVM believes that things like VMLADAVX.U8 do
exist, contradicting the spec. Here I fix that by conditioning them
out in Tablegen.
In order to do that, I've reversed the nesting order of the Tablegen
multiclasses for those instructions. Previously, the innermost
multiclass generated the X and not-X variants, and the one outside
that generated the A and not-A variants. Now X is done by the outer
multiclass, which allows me to bypass that one when I only want the
two not-X variants.
Changing the multiclass nesting order also changes the names of the
instruction ids unless I make a special effort not to. I decided that
while I was changing them anyway I'd make them look nicer; so now the
instructions have names like MVE_VMLADAVs32 or MVE_VMLADAVaxs32,
instead of cumbersome _noacc_noexch suffixes.
The corresponding multiply-subtract instructions are unaffected. Those
don't accept unsigned types at all, either in the spec or in LLVM.
Reviewers: ostannard, dmgreen
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D67214
llvm-svn: 371405
2019-09-09 23:17:26 +08:00
|
|
|
(MVE_VRMLALDAVHau32
|
2019-06-18 23:05:42 +08:00
|
|
|
tGPREven:$RdaLo, tGPROdd:$RdaHi,
|
2019-06-14 22:31:13 +08:00
|
|
|
MQPR:$Qn, MQPR:$Qm, vpred_n:$vp)>;
|
|
|
|
|
[ARM] Remove some spurious MVE reduction instructions.
The family of 'dual-accumulating' vector multiply-add instructions
(VMLADAV, VMLALDAV and VRMLALDAVH) can all operate on both signed and
unsigned integer types, and they all have an 'exchange' variant (with
an X in the name) that modifies which pairs of vector lanes in the two
inputs are multiplied together. But there's a clause in the spec that
says that the X variants //don't// operate on unsigned integer types,
only signed. You can have X, or unsigned, or neither, but not both.
We didn't notice that clause when we implemented the MC support for
these instructions, so LLVM believes that things like VMLADAVX.U8 do
exist, contradicting the spec. Here I fix that by conditioning them
out in Tablegen.
In order to do that, I've reversed the nesting order of the Tablegen
multiclasses for those instructions. Previously, the innermost
multiclass generated the X and not-X variants, and the one outside
that generated the A and not-A variants. Now X is done by the outer
multiclass, which allows me to bypass that one when I only want the
two not-X variants.
Changing the multiclass nesting order also changes the names of the
instruction ids unless I make a special effort not to. I decided that
while I was changing them anyway I'd make them look nicer; so now the
instructions have names like MVE_VMLADAVs32 or MVE_VMLADAVaxs32,
instead of cumbersome _noacc_noexch suffixes.
The corresponding multiply-subtract instructions are unaffected. Those
don't accept unsigned types at all, either in the spec or in LLVM.
Reviewers: ostannard, dmgreen
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D67214
llvm-svn: 371405
2019-09-09 23:17:26 +08:00
|
|
|
multiclass MVE_VMLALDAV_multi<string suffix, bit sz, list<dag> pattern=[]> {
|
|
|
|
defm "" : MVE_VMLALDAVBase_AX<"vmlaldav", "s"#suffix, sz, 0b0, 0b0, 0b0, pattern>;
|
|
|
|
defm "" : MVE_VMLALDAVBase_A<"vmlaldav", "", "u"#suffix,
|
|
|
|
sz, 0b1, 0b0, 0b0, 0b0, pattern>;
|
2019-06-14 22:31:13 +08:00
|
|
|
}
|
|
|
|
|
[ARM] Remove some spurious MVE reduction instructions.
The family of 'dual-accumulating' vector multiply-add instructions
(VMLADAV, VMLALDAV and VRMLALDAVH) can all operate on both signed and
unsigned integer types, and they all have an 'exchange' variant (with
an X in the name) that modifies which pairs of vector lanes in the two
inputs are multiplied together. But there's a clause in the spec that
says that the X variants //don't// operate on unsigned integer types,
only signed. You can have X, or unsigned, or neither, but not both.
We didn't notice that clause when we implemented the MC support for
these instructions, so LLVM believes that things like VMLADAVX.U8 do
exist, contradicting the spec. Here I fix that by conditioning them
out in Tablegen.
In order to do that, I've reversed the nesting order of the Tablegen
multiclasses for those instructions. Previously, the innermost
multiclass generated the X and not-X variants, and the one outside
that generated the A and not-A variants. Now X is done by the outer
multiclass, which allows me to bypass that one when I only want the
two not-X variants.
Changing the multiclass nesting order also changes the names of the
instruction ids unless I make a special effort not to. I decided that
while I was changing them anyway I'd make them look nicer; so now the
instructions have names like MVE_VMLADAVs32 or MVE_VMLADAVaxs32,
instead of cumbersome _noacc_noexch suffixes.
The corresponding multiply-subtract instructions are unaffected. Those
don't accept unsigned types at all, either in the spec or in LLVM.
Reviewers: ostannard, dmgreen
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D67214
llvm-svn: 371405
2019-09-09 23:17:26 +08:00
|
|
|
defm MVE_VMLALDAV : MVE_VMLALDAV_multi<"16", 0b0>;
|
|
|
|
defm MVE_VMLALDAV : MVE_VMLALDAV_multi<"32", 0b1>;
|
2019-06-14 22:31:13 +08:00
|
|
|
|
|
|
|
// vmlalv aliases vmlaldav
|
[ARM] Remove some spurious MVE reduction instructions.
The family of 'dual-accumulating' vector multiply-add instructions
(VMLADAV, VMLALDAV and VRMLALDAVH) can all operate on both signed and
unsigned integer types, and they all have an 'exchange' variant (with
an X in the name) that modifies which pairs of vector lanes in the two
inputs are multiplied together. But there's a clause in the spec that
says that the X variants //don't// operate on unsigned integer types,
only signed. You can have X, or unsigned, or neither, but not both.
We didn't notice that clause when we implemented the MC support for
these instructions, so LLVM believes that things like VMLADAVX.U8 do
exist, contradicting the spec. Here I fix that by conditioning them
out in Tablegen.
In order to do that, I've reversed the nesting order of the Tablegen
multiclasses for those instructions. Previously, the innermost
multiclass generated the X and not-X variants, and the one outside
that generated the A and not-A variants. Now X is done by the outer
multiclass, which allows me to bypass that one when I only want the
two not-X variants.
Changing the multiclass nesting order also changes the names of the
instruction ids unless I make a special effort not to. I decided that
while I was changing them anyway I'd make them look nicer; so now the
instructions have names like MVE_VMLADAVs32 or MVE_VMLADAVaxs32,
instead of cumbersome _noacc_noexch suffixes.
The corresponding multiply-subtract instructions are unaffected. Those
don't accept unsigned types at all, either in the spec or in LLVM.
Reviewers: ostannard, dmgreen
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D67214
llvm-svn: 371405
2019-09-09 23:17:26 +08:00
|
|
|
foreach acc = ["", "a"] in {
|
2019-06-14 22:31:13 +08:00
|
|
|
foreach suffix = ["s16", "s32", "u16", "u32"] in {
|
[ARM] Remove some spurious MVE reduction instructions.
The family of 'dual-accumulating' vector multiply-add instructions
(VMLADAV, VMLALDAV and VRMLALDAVH) can all operate on both signed and
unsigned integer types, and they all have an 'exchange' variant (with
an X in the name) that modifies which pairs of vector lanes in the two
inputs are multiplied together. But there's a clause in the spec that
says that the X variants //don't// operate on unsigned integer types,
only signed. You can have X, or unsigned, or neither, but not both.
We didn't notice that clause when we implemented the MC support for
these instructions, so LLVM believes that things like VMLADAVX.U8 do
exist, contradicting the spec. Here I fix that by conditioning them
out in Tablegen.
In order to do that, I've reversed the nesting order of the Tablegen
multiclasses for those instructions. Previously, the innermost
multiclass generated the X and not-X variants, and the one outside
that generated the A and not-A variants. Now X is done by the outer
multiclass, which allows me to bypass that one when I only want the
two not-X variants.
Changing the multiclass nesting order also changes the names of the
instruction ids unless I make a special effort not to. I decided that
while I was changing them anyway I'd make them look nicer; so now the
instructions have names like MVE_VMLADAVs32 or MVE_VMLADAVaxs32,
instead of cumbersome _noacc_noexch suffixes.
The corresponding multiply-subtract instructions are unaffected. Those
don't accept unsigned types at all, either in the spec or in LLVM.
Reviewers: ostannard, dmgreen
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D67214
llvm-svn: 371405
2019-09-09 23:17:26 +08:00
|
|
|
def : MVEInstAlias<"vmlalv" # acc # "${vp}." # suffix #
|
|
|
|
"\t$RdaLoDest, $RdaHiDest, $Qn, $Qm",
|
|
|
|
(!cast<Instruction>("MVE_VMLALDAV"#acc#suffix)
|
2019-06-14 22:31:13 +08:00
|
|
|
tGPREven:$RdaLoDest, tGPROdd:$RdaHiDest,
|
|
|
|
MQPR:$Qn, MQPR:$Qm, vpred_n:$vp)>;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-06-18 23:05:42 +08:00
|
|
|
multiclass MVE_VMLSLDAV_multi<string iname, string suffix, bit sz,
|
[ARM] Remove some spurious MVE reduction instructions.
The family of 'dual-accumulating' vector multiply-add instructions
(VMLADAV, VMLALDAV and VRMLALDAVH) can all operate on both signed and
unsigned integer types, and they all have an 'exchange' variant (with
an X in the name) that modifies which pairs of vector lanes in the two
inputs are multiplied together. But there's a clause in the spec that
says that the X variants //don't// operate on unsigned integer types,
only signed. You can have X, or unsigned, or neither, but not both.
We didn't notice that clause when we implemented the MC support for
these instructions, so LLVM believes that things like VMLADAVX.U8 do
exist, contradicting the spec. Here I fix that by conditioning them
out in Tablegen.
In order to do that, I've reversed the nesting order of the Tablegen
multiclasses for those instructions. Previously, the innermost
multiclass generated the X and not-X variants, and the one outside
that generated the A and not-A variants. Now X is done by the outer
multiclass, which allows me to bypass that one when I only want the
two not-X variants.
Changing the multiclass nesting order also changes the names of the
instruction ids unless I make a special effort not to. I decided that
while I was changing them anyway I'd make them look nicer; so now the
instructions have names like MVE_VMLADAVs32 or MVE_VMLADAVaxs32,
instead of cumbersome _noacc_noexch suffixes.
The corresponding multiply-subtract instructions are unaffected. Those
don't accept unsigned types at all, either in the spec or in LLVM.
Reviewers: ostannard, dmgreen
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D67214
llvm-svn: 371405
2019-09-09 23:17:26 +08:00
|
|
|
bit bit_28, list<dag> pattern=[]> {
|
|
|
|
defm "" : MVE_VMLALDAVBase_AX<iname, suffix, sz, bit_28, 0b0, 0b1, pattern>;
|
2019-06-14 22:31:13 +08:00
|
|
|
}
|
|
|
|
|
[ARM] Remove some spurious MVE reduction instructions.
The family of 'dual-accumulating' vector multiply-add instructions
(VMLADAV, VMLALDAV and VRMLALDAVH) can all operate on both signed and
unsigned integer types, and they all have an 'exchange' variant (with
an X in the name) that modifies which pairs of vector lanes in the two
inputs are multiplied together. But there's a clause in the spec that
says that the X variants //don't// operate on unsigned integer types,
only signed. You can have X, or unsigned, or neither, but not both.
We didn't notice that clause when we implemented the MC support for
these instructions, so LLVM believes that things like VMLADAVX.U8 do
exist, contradicting the spec. Here I fix that by conditioning them
out in Tablegen.
In order to do that, I've reversed the nesting order of the Tablegen
multiclasses for those instructions. Previously, the innermost
multiclass generated the X and not-X variants, and the one outside
that generated the A and not-A variants. Now X is done by the outer
multiclass, which allows me to bypass that one when I only want the
two not-X variants.
Changing the multiclass nesting order also changes the names of the
instruction ids unless I make a special effort not to. I decided that
while I was changing them anyway I'd make them look nicer; so now the
instructions have names like MVE_VMLADAVs32 or MVE_VMLADAVaxs32,
instead of cumbersome _noacc_noexch suffixes.
The corresponding multiply-subtract instructions are unaffected. Those
don't accept unsigned types at all, either in the spec or in LLVM.
Reviewers: ostannard, dmgreen
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D67214
llvm-svn: 371405
2019-09-09 23:17:26 +08:00
|
|
|
defm MVE_VMLSLDAV : MVE_VMLSLDAV_multi<"vmlsldav", "s16", 0b0, 0b0>;
|
|
|
|
defm MVE_VMLSLDAV : MVE_VMLSLDAV_multi<"vmlsldav", "s32", 0b1, 0b0>;
|
|
|
|
defm MVE_VRMLSLDAVH : MVE_VMLSLDAV_multi<"vrmlsldavh", "s32", 0b0, 0b1>;
|
2019-06-14 22:31:13 +08:00
|
|
|
|
[ARM] Set up infrastructure for MVE vector instructions.
This commit prepares the way to start adding the main collection of
MVE instructions, which operate on the 128-bit vector registers.
The most obvious thing that's needed, and the simplest, is to add the
MQPR register class, which is like the existing QPR except that it has
fewer registers in it.
The more complicated part: MVE defines a system of vector predication,
in which instructions operating on 128-bit vector registers can be
constrained to operate on only a subset of the lanes, using a system
of prefix instructions similar to the existing Thumb IT, in that you
have one prefix instruction which designates up to 4 following
instructions as subject to predication, and within that sequence, the
predicate can be inverted by means of T/E suffixes ('Then' / 'Else').
To support instructions of this type, we've added two new Tablegen
classes `vpred_n` and `vpred_r` for standard clusters of MC operands
to add to a predicated instruction. Both include a flag indicating how
the instruction is predicated at all (options are T, E and 'not
predicated'), and an input register field for the register controlling
the set of active lanes. They differ from each other in that `vpred_r`
also includes an input operand for the previous value of the output
register, for instructions that leave inactive lanes unchanged.
`vpred_n` lacks that extra operand; it will be used for instructions
that don't preserve inactive lanes in their output register (either
because inactive lanes are zeroed, as the MVE load instructions do, or
because the output register isn't a vector at all).
This commit also adds the family of prefix instructions themselves
(VPT / VPST), and all the machinery needed to work with them in
assembly and disassembly (e.g. generating the 't' and 'e' mnemonic
suffixes on disassembled instructions within a predicated block)
I've added a couple of demo instructions that derive from the new
Tablegen base classes and use those two operand clusters. The bulk of
the vector instructions will come in followup commits small enough to
be manageable. (One exception is that I've added the full version of
`isMnemonicVPTPredicable` in the AsmParser, because it seemed
pointless to carefully split it up.)
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62669
llvm-svn: 363258
2019-06-13 21:11:13 +08:00
|
|
|
// end of mve_rDest instructions
|
|
|
|
|
|
|
|
// start of mve_comp instructions
|
|
|
|
|
|
|
|
class MVE_comp<InstrItinClass itin, string iname, string suffix,
|
|
|
|
string cstr, list<dag> pattern=[]>
|
|
|
|
: MVE_p<(outs MQPR:$Qd), (ins MQPR:$Qn, MQPR:$Qm), itin, iname, suffix,
|
|
|
|
"$Qd, $Qn, $Qm", vpred_r, cstr, pattern> {
|
|
|
|
bits<4> Qd;
|
|
|
|
bits<4> Qn;
|
|
|
|
bits<4> Qm;
|
|
|
|
|
|
|
|
let Inst{22} = Qd{3};
|
|
|
|
let Inst{19-17} = Qn{2-0};
|
|
|
|
let Inst{16} = 0b0;
|
|
|
|
let Inst{15-13} = Qd{2-0};
|
|
|
|
let Inst{12} = 0b0;
|
|
|
|
let Inst{10-9} = 0b11;
|
|
|
|
let Inst{7} = Qn{3};
|
|
|
|
let Inst{5} = Qm{3};
|
|
|
|
let Inst{3-1} = Qm{2-0};
|
|
|
|
let Inst{0} = 0b0;
|
|
|
|
}
|
|
|
|
|
2019-06-18 23:05:42 +08:00
|
|
|
class MVE_VMINMAXNM<string iname, string suffix, bit sz, bit bit_21,
|
[ARM] Set up infrastructure for MVE vector instructions.
This commit prepares the way to start adding the main collection of
MVE instructions, which operate on the 128-bit vector registers.
The most obvious thing that's needed, and the simplest, is to add the
MQPR register class, which is like the existing QPR except that it has
fewer registers in it.
The more complicated part: MVE defines a system of vector predication,
in which instructions operating on 128-bit vector registers can be
constrained to operate on only a subset of the lanes, using a system
of prefix instructions similar to the existing Thumb IT, in that you
have one prefix instruction which designates up to 4 following
instructions as subject to predication, and within that sequence, the
predicate can be inverted by means of T/E suffixes ('Then' / 'Else').
To support instructions of this type, we've added two new Tablegen
classes `vpred_n` and `vpred_r` for standard clusters of MC operands
to add to a predicated instruction. Both include a flag indicating how
the instruction is predicated at all (options are T, E and 'not
predicated'), and an input register field for the register controlling
the set of active lanes. They differ from each other in that `vpred_r`
also includes an input operand for the previous value of the output
register, for instructions that leave inactive lanes unchanged.
`vpred_n` lacks that extra operand; it will be used for instructions
that don't preserve inactive lanes in their output register (either
because inactive lanes are zeroed, as the MVE load instructions do, or
because the output register isn't a vector at all).
This commit also adds the family of prefix instructions themselves
(VPT / VPST), and all the machinery needed to work with them in
assembly and disassembly (e.g. generating the 't' and 'e' mnemonic
suffixes on disassembled instructions within a predicated block)
I've added a couple of demo instructions that derive from the new
Tablegen base classes and use those two operand clusters. The bulk of
the vector instructions will come in followup commits small enough to
be manageable. (One exception is that I've added the full version of
`isMnemonicVPTPredicable` in the AsmParser, because it seemed
pointless to carefully split it up.)
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62669
llvm-svn: 363258
2019-06-13 21:11:13 +08:00
|
|
|
list<dag> pattern=[]>
|
|
|
|
: MVE_comp<NoItinerary, iname, suffix, "", pattern> {
|
|
|
|
|
|
|
|
let Inst{28} = 0b1;
|
|
|
|
let Inst{25-24} = 0b11;
|
|
|
|
let Inst{23} = 0b0;
|
|
|
|
let Inst{21} = bit_21;
|
|
|
|
let Inst{20} = sz;
|
|
|
|
let Inst{11} = 0b1;
|
|
|
|
let Inst{8} = 0b1;
|
|
|
|
let Inst{6} = 0b1;
|
|
|
|
let Inst{4} = 0b1;
|
|
|
|
|
|
|
|
let Predicates = [HasMVEFloat];
|
|
|
|
}
|
|
|
|
|
2019-06-18 23:05:42 +08:00
|
|
|
def MVE_VMAXNMf32 : MVE_VMINMAXNM<"vmaxnm", "f32", 0b0, 0b0>;
|
|
|
|
def MVE_VMAXNMf16 : MVE_VMINMAXNM<"vmaxnm", "f16", 0b1, 0b0>;
|
[ARM] Set up infrastructure for MVE vector instructions.
This commit prepares the way to start adding the main collection of
MVE instructions, which operate on the 128-bit vector registers.
The most obvious thing that's needed, and the simplest, is to add the
MQPR register class, which is like the existing QPR except that it has
fewer registers in it.
The more complicated part: MVE defines a system of vector predication,
in which instructions operating on 128-bit vector registers can be
constrained to operate on only a subset of the lanes, using a system
of prefix instructions similar to the existing Thumb IT, in that you
have one prefix instruction which designates up to 4 following
instructions as subject to predication, and within that sequence, the
predicate can be inverted by means of T/E suffixes ('Then' / 'Else').
To support instructions of this type, we've added two new Tablegen
classes `vpred_n` and `vpred_r` for standard clusters of MC operands
to add to a predicated instruction. Both include a flag indicating how
the instruction is predicated at all (options are T, E and 'not
predicated'), and an input register field for the register controlling
the set of active lanes. They differ from each other in that `vpred_r`
also includes an input operand for the previous value of the output
register, for instructions that leave inactive lanes unchanged.
`vpred_n` lacks that extra operand; it will be used for instructions
that don't preserve inactive lanes in their output register (either
because inactive lanes are zeroed, as the MVE load instructions do, or
because the output register isn't a vector at all).
This commit also adds the family of prefix instructions themselves
(VPT / VPST), and all the machinery needed to work with them in
assembly and disassembly (e.g. generating the 't' and 'e' mnemonic
suffixes on disassembled instructions within a predicated block)
I've added a couple of demo instructions that derive from the new
Tablegen base classes and use those two operand clusters. The bulk of
the vector instructions will come in followup commits small enough to
be manageable. (One exception is that I've added the full version of
`isMnemonicVPTPredicable` in the AsmParser, because it seemed
pointless to carefully split it up.)
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62669
llvm-svn: 363258
2019-06-13 21:11:13 +08:00
|
|
|
|
2019-07-13 22:29:02 +08:00
|
|
|
let Predicates = [HasMVEFloat] in {
|
|
|
|
def : Pat<(v4f32 (fmaxnum (v4f32 MQPR:$val1), (v4f32 MQPR:$val2))),
|
|
|
|
(v4f32 (MVE_VMAXNMf32 (v4f32 MQPR:$val1), (v4f32 MQPR:$val2)))>;
|
|
|
|
def : Pat<(v8f16 (fmaxnum (v8f16 MQPR:$val1), (v8f16 MQPR:$val2))),
|
|
|
|
(v8f16 (MVE_VMAXNMf16 (v8f16 MQPR:$val1), (v8f16 MQPR:$val2)))>;
|
|
|
|
}
|
|
|
|
|
2019-06-18 23:05:42 +08:00
|
|
|
def MVE_VMINNMf32 : MVE_VMINMAXNM<"vminnm", "f32", 0b0, 0b1>;
|
|
|
|
def MVE_VMINNMf16 : MVE_VMINMAXNM<"vminnm", "f16", 0b1, 0b1>;
|
[ARM] Set up infrastructure for MVE vector instructions.
This commit prepares the way to start adding the main collection of
MVE instructions, which operate on the 128-bit vector registers.
The most obvious thing that's needed, and the simplest, is to add the
MQPR register class, which is like the existing QPR except that it has
fewer registers in it.
The more complicated part: MVE defines a system of vector predication,
in which instructions operating on 128-bit vector registers can be
constrained to operate on only a subset of the lanes, using a system
of prefix instructions similar to the existing Thumb IT, in that you
have one prefix instruction which designates up to 4 following
instructions as subject to predication, and within that sequence, the
predicate can be inverted by means of T/E suffixes ('Then' / 'Else').
To support instructions of this type, we've added two new Tablegen
classes `vpred_n` and `vpred_r` for standard clusters of MC operands
to add to a predicated instruction. Both include a flag indicating how
the instruction is predicated at all (options are T, E and 'not
predicated'), and an input register field for the register controlling
the set of active lanes. They differ from each other in that `vpred_r`
also includes an input operand for the previous value of the output
register, for instructions that leave inactive lanes unchanged.
`vpred_n` lacks that extra operand; it will be used for instructions
that don't preserve inactive lanes in their output register (either
because inactive lanes are zeroed, as the MVE load instructions do, or
because the output register isn't a vector at all).
This commit also adds the family of prefix instructions themselves
(VPT / VPST), and all the machinery needed to work with them in
assembly and disassembly (e.g. generating the 't' and 'e' mnemonic
suffixes on disassembled instructions within a predicated block)
I've added a couple of demo instructions that derive from the new
Tablegen base classes and use those two operand clusters. The bulk of
the vector instructions will come in followup commits small enough to
be manageable. (One exception is that I've added the full version of
`isMnemonicVPTPredicable` in the AsmParser, because it seemed
pointless to carefully split it up.)
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62669
llvm-svn: 363258
2019-06-13 21:11:13 +08:00
|
|
|
|
2019-07-13 22:29:02 +08:00
|
|
|
let Predicates = [HasMVEFloat] in {
|
|
|
|
def : Pat<(v4f32 (fminnum (v4f32 MQPR:$val1), (v4f32 MQPR:$val2))),
|
|
|
|
(v4f32 (MVE_VMINNMf32 (v4f32 MQPR:$val1), (v4f32 MQPR:$val2)))>;
|
|
|
|
def : Pat<(v8f16 (fminnum (v8f16 MQPR:$val1), (v8f16 MQPR:$val2))),
|
|
|
|
(v8f16 (MVE_VMINNMf16 (v8f16 MQPR:$val1), (v8f16 MQPR:$val2)))>;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2019-06-18 23:51:46 +08:00
|
|
|
class MVE_VMINMAX<string iname, string suffix, bit U, bits<2> size,
|
|
|
|
bit bit_4, list<dag> pattern=[]>
|
|
|
|
: MVE_comp<NoItinerary, iname, suffix, "", pattern> {
|
|
|
|
|
|
|
|
let Inst{28} = U;
|
|
|
|
let Inst{25-24} = 0b11;
|
|
|
|
let Inst{23} = 0b0;
|
|
|
|
let Inst{21-20} = size{1-0};
|
|
|
|
let Inst{11} = 0b0;
|
|
|
|
let Inst{8} = 0b0;
|
|
|
|
let Inst{6} = 0b1;
|
|
|
|
let Inst{4} = bit_4;
|
|
|
|
}
|
|
|
|
|
|
|
|
multiclass MVE_VMINMAX_all_sizes<string iname, bit bit_4> {
|
|
|
|
def s8 : MVE_VMINMAX<iname, "s8", 0b0, 0b00, bit_4>;
|
|
|
|
def s16 : MVE_VMINMAX<iname, "s16", 0b0, 0b01, bit_4>;
|
|
|
|
def s32 : MVE_VMINMAX<iname, "s32", 0b0, 0b10, bit_4>;
|
|
|
|
def u8 : MVE_VMINMAX<iname, "u8", 0b1, 0b00, bit_4>;
|
|
|
|
def u16 : MVE_VMINMAX<iname, "u16", 0b1, 0b01, bit_4>;
|
|
|
|
def u32 : MVE_VMINMAX<iname, "u32", 0b1, 0b10, bit_4>;
|
|
|
|
}
|
|
|
|
|
|
|
|
defm MVE_VMAX : MVE_VMINMAX_all_sizes<"vmax", 0b0>;
|
|
|
|
defm MVE_VMIN : MVE_VMINMAX_all_sizes<"vmin", 0b1>;
|
|
|
|
|
2019-07-13 22:48:54 +08:00
|
|
|
let Predicates = [HasMVEInt] in {
|
|
|
|
def : Pat<(v16i8 (smin (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))),
|
|
|
|
(v16i8 (MVE_VMINs8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
|
|
|
|
def : Pat<(v8i16 (smin (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))),
|
|
|
|
(v8i16 (MVE_VMINs16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
|
|
|
|
def : Pat<(v4i32 (smin (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))),
|
|
|
|
(v4i32 (MVE_VMINs32 (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
|
|
|
|
|
|
|
|
def : Pat<(v16i8 (smax (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))),
|
|
|
|
(v16i8 (MVE_VMAXs8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
|
|
|
|
def : Pat<(v8i16 (smax (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))),
|
|
|
|
(v8i16 (MVE_VMAXs16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
|
|
|
|
def : Pat<(v4i32 (smax (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))),
|
|
|
|
(v4i32 (MVE_VMAXs32 (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
|
|
|
|
|
|
|
|
def : Pat<(v16i8 (umin (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))),
|
|
|
|
(v16i8 (MVE_VMINu8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
|
|
|
|
def : Pat<(v8i16 (umin (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))),
|
|
|
|
(v8i16 (MVE_VMINu16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
|
|
|
|
def : Pat<(v4i32 (umin (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))),
|
|
|
|
(v4i32 (MVE_VMINu32 (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
|
|
|
|
|
|
|
|
def : Pat<(v16i8 (umax (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))),
|
|
|
|
(v16i8 (MVE_VMAXu8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
|
|
|
|
def : Pat<(v8i16 (umax (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))),
|
|
|
|
(v8i16 (MVE_VMAXu16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
|
|
|
|
def : Pat<(v4i32 (umax (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))),
|
|
|
|
(v4i32 (MVE_VMAXu32 (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
|
|
|
|
}
|
|
|
|
|
[ARM] Set up infrastructure for MVE vector instructions.
This commit prepares the way to start adding the main collection of
MVE instructions, which operate on the 128-bit vector registers.
The most obvious thing that's needed, and the simplest, is to add the
MQPR register class, which is like the existing QPR except that it has
fewer registers in it.
The more complicated part: MVE defines a system of vector predication,
in which instructions operating on 128-bit vector registers can be
constrained to operate on only a subset of the lanes, using a system
of prefix instructions similar to the existing Thumb IT, in that you
have one prefix instruction which designates up to 4 following
instructions as subject to predication, and within that sequence, the
predicate can be inverted by means of T/E suffixes ('Then' / 'Else').
To support instructions of this type, we've added two new Tablegen
classes `vpred_n` and `vpred_r` for standard clusters of MC operands
to add to a predicated instruction. Both include a flag indicating how
the instruction is predicated at all (options are T, E and 'not
predicated'), and an input register field for the register controlling
the set of active lanes. They differ from each other in that `vpred_r`
also includes an input operand for the previous value of the output
register, for instructions that leave inactive lanes unchanged.
`vpred_n` lacks that extra operand; it will be used for instructions
that don't preserve inactive lanes in their output register (either
because inactive lanes are zeroed, as the MVE load instructions do, or
because the output register isn't a vector at all).
This commit also adds the family of prefix instructions themselves
(VPT / VPST), and all the machinery needed to work with them in
assembly and disassembly (e.g. generating the 't' and 'e' mnemonic
suffixes on disassembled instructions within a predicated block)
I've added a couple of demo instructions that derive from the new
Tablegen base classes and use those two operand clusters. The bulk of
the vector instructions will come in followup commits small enough to
be manageable. (One exception is that I've added the full version of
`isMnemonicVPTPredicable` in the AsmParser, because it seemed
pointless to carefully split it up.)
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62669
llvm-svn: 363258
2019-06-13 21:11:13 +08:00
|
|
|
// end of mve_comp instructions
|
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
// start of mve_bit instructions
|
2019-06-19 00:19:59 +08:00
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
class MVE_bit_arith<dag oops, dag iops, string iname, string suffix,
|
|
|
|
string ops, string cstr, list<dag> pattern=[]>
|
|
|
|
: MVE_p<oops, iops, NoItinerary, iname, suffix, ops, vpred_r, cstr, pattern> {
|
2019-06-19 00:19:59 +08:00
|
|
|
bits<4> Qd;
|
|
|
|
bits<4> Qm;
|
|
|
|
|
|
|
|
let Inst{22} = Qd{3};
|
|
|
|
let Inst{15-13} = Qd{2-0};
|
|
|
|
let Inst{5} = Qm{3};
|
|
|
|
let Inst{3-1} = Qm{2-0};
|
|
|
|
}
|
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
def MVE_VBIC : MVE_bit_arith<(outs MQPR:$Qd), (ins MQPR:$Qn, MQPR:$Qm),
|
|
|
|
"vbic", "", "$Qd, $Qn, $Qm", ""> {
|
|
|
|
bits<4> Qn;
|
|
|
|
|
|
|
|
let Inst{28} = 0b0;
|
|
|
|
let Inst{25-23} = 0b110;
|
|
|
|
let Inst{21-20} = 0b01;
|
|
|
|
let Inst{19-17} = Qn{2-0};
|
|
|
|
let Inst{16} = 0b0;
|
|
|
|
let Inst{12-8} = 0b00001;
|
|
|
|
let Inst{7} = Qn{3};
|
|
|
|
let Inst{6} = 0b1;
|
|
|
|
let Inst{4} = 0b1;
|
2019-06-19 00:19:59 +08:00
|
|
|
let Inst{0} = 0b0;
|
2019-10-15 21:12:51 +08:00
|
|
|
let validForTailPredication = 1;
|
2019-06-19 00:19:59 +08:00
|
|
|
}
|
|
|
|
|
2019-08-01 19:22:03 +08:00
|
|
|
class MVE_VREV<string iname, string suffix, bits<2> size, bits<2> bit_8_7, string cstr="">
|
2019-07-15 19:22:05 +08:00
|
|
|
: MVE_bit_arith<(outs MQPR:$Qd), (ins MQPR:$Qm), iname,
|
2019-08-01 19:22:03 +08:00
|
|
|
suffix, "$Qd, $Qm", cstr> {
|
2019-07-15 19:22:05 +08:00
|
|
|
|
|
|
|
let Inst{28} = 0b1;
|
|
|
|
let Inst{25-23} = 0b111;
|
|
|
|
let Inst{21-20} = 0b11;
|
|
|
|
let Inst{19-18} = size;
|
|
|
|
let Inst{17-16} = 0b00;
|
|
|
|
let Inst{12-9} = 0b0000;
|
|
|
|
let Inst{8-7} = bit_8_7;
|
|
|
|
let Inst{6} = 0b1;
|
|
|
|
let Inst{4} = 0b0;
|
|
|
|
let Inst{0} = 0b0;
|
2019-06-19 00:19:59 +08:00
|
|
|
}
|
|
|
|
|
2019-08-01 19:22:03 +08:00
|
|
|
def MVE_VREV64_8 : MVE_VREV<"vrev64", "8", 0b00, 0b00, "@earlyclobber $Qd">;
|
|
|
|
def MVE_VREV64_16 : MVE_VREV<"vrev64", "16", 0b01, 0b00, "@earlyclobber $Qd">;
|
|
|
|
def MVE_VREV64_32 : MVE_VREV<"vrev64", "32", 0b10, 0b00, "@earlyclobber $Qd">;
|
2019-07-15 19:22:05 +08:00
|
|
|
|
|
|
|
def MVE_VREV32_8 : MVE_VREV<"vrev32", "8", 0b00, 0b01>;
|
|
|
|
def MVE_VREV32_16 : MVE_VREV<"vrev32", "16", 0b01, 0b01>;
|
|
|
|
|
|
|
|
def MVE_VREV16_8 : MVE_VREV<"vrev16", "8", 0b00, 0b10>;
|
2019-06-19 00:19:59 +08:00
|
|
|
|
2019-09-16 23:20:10 +08:00
|
|
|
let Predicates = [HasMVEInt] in {
|
|
|
|
def : Pat<(v8i16 (bswap (v8i16 MQPR:$src))),
|
|
|
|
(v8i16 (MVE_VREV16_8 (v8i16 MQPR:$src)))>;
|
|
|
|
def : Pat<(v4i32 (bswap (v4i32 MQPR:$src))),
|
|
|
|
(v4i32 (MVE_VREV32_8 (v4i32 MQPR:$src)))>;
|
|
|
|
}
|
|
|
|
|
2019-07-13 23:43:00 +08:00
|
|
|
let Predicates = [HasMVEInt] in {
|
2019-07-16 01:29:06 +08:00
|
|
|
def : Pat<(v4i32 (ARMvrev64 (v4i32 MQPR:$src))),
|
|
|
|
(v4i32 (MVE_VREV64_32 (v4i32 MQPR:$src)))>;
|
|
|
|
def : Pat<(v8i16 (ARMvrev64 (v8i16 MQPR:$src))),
|
|
|
|
(v8i16 (MVE_VREV64_16 (v8i16 MQPR:$src)))>;
|
|
|
|
def : Pat<(v16i8 (ARMvrev64 (v16i8 MQPR:$src))),
|
|
|
|
(v16i8 (MVE_VREV64_8 (v16i8 MQPR:$src)))>;
|
2019-07-13 23:43:00 +08:00
|
|
|
|
2019-07-16 01:29:06 +08:00
|
|
|
def : Pat<(v8i16 (ARMvrev32 (v8i16 MQPR:$src))),
|
|
|
|
(v8i16 (MVE_VREV32_16 (v8i16 MQPR:$src)))>;
|
|
|
|
def : Pat<(v16i8 (ARMvrev32 (v16i8 MQPR:$src))),
|
|
|
|
(v16i8 (MVE_VREV32_8 (v16i8 MQPR:$src)))>;
|
2019-07-13 23:43:00 +08:00
|
|
|
|
2019-07-16 01:29:06 +08:00
|
|
|
def : Pat<(v16i8 (ARMvrev16 (v16i8 MQPR:$src))),
|
|
|
|
(v16i8 (MVE_VREV16_8 (v16i8 MQPR:$src)))>;
|
2019-07-13 23:43:00 +08:00
|
|
|
|
2019-07-16 01:29:06 +08:00
|
|
|
def : Pat<(v4f32 (ARMvrev64 (v4f32 MQPR:$src))),
|
|
|
|
(v4f32 (MVE_VREV64_32 (v4f32 MQPR:$src)))>;
|
|
|
|
def : Pat<(v8f16 (ARMvrev64 (v8f16 MQPR:$src))),
|
|
|
|
(v8f16 (MVE_VREV64_16 (v8f16 MQPR:$src)))>;
|
|
|
|
def : Pat<(v8f16 (ARMvrev32 (v8f16 MQPR:$src))),
|
|
|
|
(v8f16 (MVE_VREV32_16 (v8f16 MQPR:$src)))>;
|
2019-07-15 19:22:05 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
def MVE_VMVN : MVE_bit_arith<(outs MQPR:$Qd), (ins MQPR:$Qm),
|
|
|
|
"vmvn", "", "$Qd, $Qm", ""> {
|
|
|
|
let Inst{28} = 0b1;
|
|
|
|
let Inst{25-23} = 0b111;
|
|
|
|
let Inst{21-16} = 0b110000;
|
|
|
|
let Inst{12-6} = 0b0010111;
|
2019-06-19 00:19:59 +08:00
|
|
|
let Inst{4} = 0b0;
|
|
|
|
let Inst{0} = 0b0;
|
2019-10-15 21:12:51 +08:00
|
|
|
let validForTailPredication = 1;
|
2019-06-19 00:19:59 +08:00
|
|
|
}
|
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
let Predicates = [HasMVEInt] in {
|
|
|
|
def : Pat<(v16i8 (vnotq (v16i8 MQPR:$val1))),
|
|
|
|
(v16i8 (MVE_VMVN (v16i8 MQPR:$val1)))>;
|
|
|
|
def : Pat<(v8i16 (vnotq (v8i16 MQPR:$val1))),
|
|
|
|
(v8i16 (MVE_VMVN (v8i16 MQPR:$val1)))>;
|
|
|
|
def : Pat<(v4i32 (vnotq (v4i32 MQPR:$val1))),
|
|
|
|
(v4i32 (MVE_VMVN (v4i32 MQPR:$val1)))>;
|
2019-07-16 02:42:54 +08:00
|
|
|
def : Pat<(v2i64 (vnotq (v2i64 MQPR:$val1))),
|
|
|
|
(v2i64 (MVE_VMVN (v2i64 MQPR:$val1)))>;
|
2019-06-19 00:19:59 +08:00
|
|
|
}
|
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
class MVE_bit_ops<string iname, bits<2> bit_21_20, bit bit_28>
|
|
|
|
: MVE_bit_arith<(outs MQPR:$Qd), (ins MQPR:$Qn, MQPR:$Qm),
|
|
|
|
iname, "", "$Qd, $Qn, $Qm", ""> {
|
|
|
|
bits<4> Qn;
|
|
|
|
|
|
|
|
let Inst{28} = bit_28;
|
|
|
|
let Inst{25-23} = 0b110;
|
|
|
|
let Inst{21-20} = bit_21_20;
|
|
|
|
let Inst{19-17} = Qn{2-0};
|
|
|
|
let Inst{16} = 0b0;
|
|
|
|
let Inst{12-8} = 0b00001;
|
|
|
|
let Inst{7} = Qn{3};
|
|
|
|
let Inst{6} = 0b1;
|
|
|
|
let Inst{4} = 0b1;
|
|
|
|
let Inst{0} = 0b0;
|
2019-10-15 21:12:51 +08:00
|
|
|
let validForTailPredication = 1;
|
2019-06-19 00:19:59 +08:00
|
|
|
}
|
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
def MVE_VEOR : MVE_bit_ops<"veor", 0b00, 0b1>;
|
|
|
|
def MVE_VORN : MVE_bit_ops<"vorn", 0b11, 0b0>;
|
|
|
|
def MVE_VORR : MVE_bit_ops<"vorr", 0b10, 0b0>;
|
|
|
|
def MVE_VAND : MVE_bit_ops<"vand", 0b00, 0b0>;
|
2019-06-19 00:19:59 +08:00
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
// add ignored suffixes as aliases
|
2019-06-19 00:19:59 +08:00
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
foreach s=["s8", "s16", "s32", "u8", "u16", "u32", "i8", "i16", "i32", "f16", "f32"] in {
|
|
|
|
def : MVEInstAlias<"vbic${vp}." # s # "\t$QdSrc, $QnSrc, $QmSrc",
|
|
|
|
(MVE_VBIC MQPR:$QdSrc, MQPR:$QnSrc, MQPR:$QmSrc, vpred_r:$vp)>;
|
|
|
|
def : MVEInstAlias<"veor${vp}." # s # "\t$QdSrc, $QnSrc, $QmSrc",
|
|
|
|
(MVE_VEOR MQPR:$QdSrc, MQPR:$QnSrc, MQPR:$QmSrc, vpred_r:$vp)>;
|
|
|
|
def : MVEInstAlias<"vorn${vp}." # s # "\t$QdSrc, $QnSrc, $QmSrc",
|
|
|
|
(MVE_VORN MQPR:$QdSrc, MQPR:$QnSrc, MQPR:$QmSrc, vpred_r:$vp)>;
|
|
|
|
def : MVEInstAlias<"vorr${vp}." # s # "\t$QdSrc, $QnSrc, $QmSrc",
|
|
|
|
(MVE_VORR MQPR:$QdSrc, MQPR:$QnSrc, MQPR:$QmSrc, vpred_r:$vp)>;
|
|
|
|
def : MVEInstAlias<"vand${vp}." # s # "\t$QdSrc, $QnSrc, $QmSrc",
|
|
|
|
(MVE_VAND MQPR:$QdSrc, MQPR:$QnSrc, MQPR:$QmSrc, vpred_r:$vp)>;
|
2019-06-19 00:19:59 +08:00
|
|
|
}
|
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
let Predicates = [HasMVEInt] in {
|
|
|
|
def : Pat<(v16i8 (and (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))),
|
|
|
|
(v16i8 (MVE_VAND (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
|
|
|
|
def : Pat<(v8i16 (and (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))),
|
|
|
|
(v8i16 (MVE_VAND (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
|
|
|
|
def : Pat<(v4i32 (and (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))),
|
|
|
|
(v4i32 (MVE_VAND (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
|
2019-07-16 02:42:54 +08:00
|
|
|
def : Pat<(v2i64 (and (v2i64 MQPR:$val1), (v2i64 MQPR:$val2))),
|
|
|
|
(v2i64 (MVE_VAND (v2i64 MQPR:$val1), (v2i64 MQPR:$val2)))>;
|
2019-06-19 00:19:59 +08:00
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
def : Pat<(v16i8 (or (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))),
|
|
|
|
(v16i8 (MVE_VORR (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
|
|
|
|
def : Pat<(v8i16 (or (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))),
|
|
|
|
(v8i16 (MVE_VORR (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
|
|
|
|
def : Pat<(v4i32 (or (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))),
|
|
|
|
(v4i32 (MVE_VORR (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
|
2019-07-16 02:42:54 +08:00
|
|
|
def : Pat<(v2i64 (or (v2i64 MQPR:$val1), (v2i64 MQPR:$val2))),
|
|
|
|
(v2i64 (MVE_VORR (v2i64 MQPR:$val1), (v2i64 MQPR:$val2)))>;
|
2019-06-19 00:19:59 +08:00
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
def : Pat<(v16i8 (xor (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))),
|
|
|
|
(v16i8 (MVE_VEOR (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
|
|
|
|
def : Pat<(v8i16 (xor (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))),
|
|
|
|
(v8i16 (MVE_VEOR (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
|
|
|
|
def : Pat<(v4i32 (xor (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))),
|
|
|
|
(v4i32 (MVE_VEOR (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
|
2019-07-16 02:42:54 +08:00
|
|
|
def : Pat<(v2i64 (xor (v2i64 MQPR:$val1), (v2i64 MQPR:$val2))),
|
|
|
|
(v2i64 (MVE_VEOR (v2i64 MQPR:$val1), (v2i64 MQPR:$val2)))>;
|
2019-06-19 00:19:59 +08:00
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
def : Pat<(v16i8 (and (v16i8 MQPR:$val1), (vnotq MQPR:$val2))),
|
|
|
|
(v16i8 (MVE_VBIC (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
|
|
|
|
def : Pat<(v8i16 (and (v8i16 MQPR:$val1), (vnotq MQPR:$val2))),
|
|
|
|
(v8i16 (MVE_VBIC (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
|
|
|
|
def : Pat<(v4i32 (and (v4i32 MQPR:$val1), (vnotq MQPR:$val2))),
|
|
|
|
(v4i32 (MVE_VBIC (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
|
2019-07-16 02:42:54 +08:00
|
|
|
def : Pat<(v2i64 (and (v2i64 MQPR:$val1), (vnotq MQPR:$val2))),
|
|
|
|
(v2i64 (MVE_VBIC (v2i64 MQPR:$val1), (v2i64 MQPR:$val2)))>;
|
2019-06-19 00:19:59 +08:00
|
|
|
|
2019-07-16 02:42:54 +08:00
|
|
|
def : Pat<(v16i8 (or (v16i8 MQPR:$val1), (vnotq MQPR:$val2))),
|
2019-07-15 19:22:05 +08:00
|
|
|
(v16i8 (MVE_VORN (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
|
|
|
|
def : Pat<(v8i16 (or (v8i16 MQPR:$val1), (vnotq MQPR:$val2))),
|
|
|
|
(v8i16 (MVE_VORN (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
|
|
|
|
def : Pat<(v4i32 (or (v4i32 MQPR:$val1), (vnotq MQPR:$val2))),
|
|
|
|
(v4i32 (MVE_VORN (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
|
2019-07-16 02:42:54 +08:00
|
|
|
def : Pat<(v2i64 (or (v2i64 MQPR:$val1), (vnotq MQPR:$val2))),
|
|
|
|
(v2i64 (MVE_VORN (v2i64 MQPR:$val1), (v2i64 MQPR:$val2)))>;
|
2019-06-19 00:19:59 +08:00
|
|
|
}
|
2019-07-15 19:22:05 +08:00
|
|
|
|
|
|
|
class MVE_bit_cmode<string iname, string suffix, bits<4> cmode, dag inOps>
|
|
|
|
: MVE_p<(outs MQPR:$Qd), inOps, NoItinerary,
|
|
|
|
iname, suffix, "$Qd, $imm", vpred_n, "$Qd = $Qd_src"> {
|
|
|
|
bits<8> imm;
|
|
|
|
bits<4> Qd;
|
|
|
|
|
|
|
|
let Inst{28} = imm{7};
|
|
|
|
let Inst{27-23} = 0b11111;
|
|
|
|
let Inst{22} = Qd{3};
|
|
|
|
let Inst{21-19} = 0b000;
|
|
|
|
let Inst{18-16} = imm{6-4};
|
|
|
|
let Inst{15-13} = Qd{2-0};
|
|
|
|
let Inst{12} = 0b0;
|
|
|
|
let Inst{11-8} = cmode;
|
|
|
|
let Inst{7-6} = 0b01;
|
|
|
|
let Inst{4} = 0b1;
|
|
|
|
let Inst{3-0} = imm{3-0};
|
2019-06-19 00:19:59 +08:00
|
|
|
}
|
2019-07-15 19:22:05 +08:00
|
|
|
|
|
|
|
class MVE_VORR<string suffix, bits<4> cmode, ExpandImm imm_type>
|
|
|
|
: MVE_bit_cmode<"vorr", suffix, cmode, (ins MQPR:$Qd_src, imm_type:$imm)> {
|
|
|
|
let Inst{5} = 0b0;
|
2019-10-15 21:12:51 +08:00
|
|
|
let validForTailPredication = 1;
|
2019-06-19 00:19:59 +08:00
|
|
|
}
|
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
def MVE_VORRIZ0v4i32 : MVE_VORR<"i32", 0b0001, expzero00>;
|
|
|
|
def MVE_VORRIZ0v8i16 : MVE_VORR<"i16", 0b1001, expzero00>;
|
|
|
|
def MVE_VORRIZ8v4i32 : MVE_VORR<"i32", 0b0011, expzero08>;
|
|
|
|
def MVE_VORRIZ8v8i16 : MVE_VORR<"i16", 0b1011, expzero08>;
|
|
|
|
def MVE_VORRIZ16v4i32 : MVE_VORR<"i32", 0b0101, expzero16>;
|
|
|
|
def MVE_VORRIZ24v4i32 : MVE_VORR<"i32", 0b0111, expzero24>;
|
2019-06-19 00:19:59 +08:00
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
def MVE_VORNIZ0v4i32 : MVEAsmPseudo<"vorn${vp}.i32\t$Qd, $imm",
|
|
|
|
(ins MQPR:$Qd_src, expzero00inv32:$imm, vpred_n:$vp), (outs MQPR:$Qd)>;
|
|
|
|
def MVE_VORNIZ0v8i16 : MVEAsmPseudo<"vorn${vp}.i16\t$Qd, $imm",
|
|
|
|
(ins MQPR:$Qd_src, expzero00inv16:$imm, vpred_n:$vp), (outs MQPR:$Qd)>;
|
|
|
|
def MVE_VORNIZ8v4i32 : MVEAsmPseudo<"vorn${vp}.i32\t$Qd, $imm",
|
|
|
|
(ins MQPR:$Qd_src, expzero08inv32:$imm, vpred_n:$vp), (outs MQPR:$Qd)>;
|
|
|
|
def MVE_VORNIZ8v8i16 : MVEAsmPseudo<"vorn${vp}.i16\t$Qd, $imm",
|
|
|
|
(ins MQPR:$Qd_src, expzero08inv16:$imm, vpred_n:$vp), (outs MQPR:$Qd)>;
|
|
|
|
def MVE_VORNIZ16v4i32 : MVEAsmPseudo<"vorn${vp}.i32\t$Qd, $imm",
|
|
|
|
(ins MQPR:$Qd_src, expzero16inv32:$imm, vpred_n:$vp), (outs MQPR:$Qd)>;
|
|
|
|
def MVE_VORNIZ24v4i32 : MVEAsmPseudo<"vorn${vp}.i32\t$Qd, $imm",
|
|
|
|
(ins MQPR:$Qd_src, expzero24inv32:$imm, vpred_n:$vp), (outs MQPR:$Qd)>;
|
2019-06-19 00:19:59 +08:00
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
def MVE_VMOV : MVEInstAlias<"vmov${vp}\t$Qd, $Qm",
|
|
|
|
(MVE_VORR MQPR:$Qd, MQPR:$Qm, MQPR:$Qm, vpred_r:$vp)>;
|
|
|
|
|
|
|
|
class MVE_VBIC<string suffix, bits<4> cmode, ExpandImm imm_type>
|
|
|
|
: MVE_bit_cmode<"vbic", suffix, cmode, (ins MQPR:$Qd_src, imm_type:$imm)> {
|
|
|
|
let Inst{5} = 0b1;
|
2019-10-15 21:12:51 +08:00
|
|
|
let validForTailPredication = 1;
|
2019-06-19 00:19:59 +08:00
|
|
|
}
|
2019-07-15 19:22:05 +08:00
|
|
|
|
|
|
|
def MVE_VBICIZ0v4i32 : MVE_VBIC<"i32", 0b0001, expzero00>;
|
|
|
|
def MVE_VBICIZ0v8i16 : MVE_VBIC<"i16", 0b1001, expzero00>;
|
|
|
|
def MVE_VBICIZ8v4i32 : MVE_VBIC<"i32", 0b0011, expzero08>;
|
|
|
|
def MVE_VBICIZ8v8i16 : MVE_VBIC<"i16", 0b1011, expzero08>;
|
|
|
|
def MVE_VBICIZ16v4i32 : MVE_VBIC<"i32", 0b0101, expzero16>;
|
|
|
|
def MVE_VBICIZ24v4i32 : MVE_VBIC<"i32", 0b0111, expzero24>;
|
|
|
|
|
|
|
|
def MVE_VANDIZ0v4i32 : MVEAsmPseudo<"vand${vp}.i32\t$Qda, $imm",
|
|
|
|
(ins MQPR:$Qda_src, expzero00inv32:$imm, vpred_n:$vp), (outs MQPR:$Qda)>;
|
|
|
|
def MVE_VANDIZ0v8i16 : MVEAsmPseudo<"vand${vp}.i16\t$Qda, $imm",
|
|
|
|
(ins MQPR:$Qda_src, expzero00inv16:$imm, vpred_n:$vp), (outs MQPR:$Qda)>;
|
|
|
|
def MVE_VANDIZ8v4i32 : MVEAsmPseudo<"vand${vp}.i32\t$Qda, $imm",
|
|
|
|
(ins MQPR:$Qda_src, expzero08inv32:$imm, vpred_n:$vp), (outs MQPR:$Qda)>;
|
|
|
|
def MVE_VANDIZ8v8i16 : MVEAsmPseudo<"vand${vp}.i16\t$Qda, $imm",
|
|
|
|
(ins MQPR:$Qda_src, expzero08inv16:$imm, vpred_n:$vp), (outs MQPR:$Qda)>;
|
|
|
|
def MVE_VANDIZ16v4i32 : MVEAsmPseudo<"vand${vp}.i32\t$Qda, $imm",
|
|
|
|
(ins MQPR:$Qda_src, expzero16inv32:$imm, vpred_n:$vp), (outs MQPR:$Qda)>;
|
|
|
|
def MVE_VANDIZ24v4i32 : MVEAsmPseudo<"vand${vp}.i32\t$Qda, $imm",
|
|
|
|
(ins MQPR:$Qda_src, expzero24inv32:$imm, vpred_n:$vp), (outs MQPR:$Qda)>;
|
|
|
|
|
|
|
|
class MVE_VMOV_lane_direction {
|
|
|
|
bit bit_20;
|
|
|
|
dag oops;
|
|
|
|
dag iops;
|
|
|
|
string ops;
|
|
|
|
string cstr;
|
2019-06-19 00:19:59 +08:00
|
|
|
}
|
2019-07-15 19:22:05 +08:00
|
|
|
def MVE_VMOV_from_lane : MVE_VMOV_lane_direction {
|
|
|
|
let bit_20 = 0b1;
|
|
|
|
let oops = (outs rGPR:$Rt);
|
|
|
|
let iops = (ins MQPR:$Qd);
|
|
|
|
let ops = "$Rt, $Qd$Idx";
|
|
|
|
let cstr = "";
|
2019-06-19 00:19:59 +08:00
|
|
|
}
|
2019-07-15 19:22:05 +08:00
|
|
|
def MVE_VMOV_to_lane : MVE_VMOV_lane_direction {
|
|
|
|
let bit_20 = 0b0;
|
|
|
|
let oops = (outs MQPR:$Qd);
|
|
|
|
let iops = (ins MQPR:$Qd_src, rGPR:$Rt);
|
|
|
|
let ops = "$Qd$Idx, $Rt";
|
|
|
|
let cstr = "$Qd = $Qd_src";
|
2019-06-19 00:19:59 +08:00
|
|
|
}
|
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
class MVE_VMOV_lane<string suffix, bit U, dag indexop,
|
|
|
|
MVE_VMOV_lane_direction dir>
|
|
|
|
: MVE_VMOV_lane_base<dir.oops, !con(dir.iops, indexop), NoItinerary,
|
|
|
|
"vmov", suffix, dir.ops, dir.cstr, []> {
|
|
|
|
bits<4> Qd;
|
|
|
|
bits<4> Rt;
|
|
|
|
|
|
|
|
let Inst{31-24} = 0b11101110;
|
|
|
|
let Inst{23} = U;
|
|
|
|
let Inst{20} = dir.bit_20;
|
|
|
|
let Inst{19-17} = Qd{2-0};
|
|
|
|
let Inst{15-12} = Rt{3-0};
|
|
|
|
let Inst{11-8} = 0b1011;
|
|
|
|
let Inst{7} = Qd{3};
|
|
|
|
let Inst{4-0} = 0b10000;
|
2019-06-19 00:19:59 +08:00
|
|
|
}
|
2019-07-15 19:22:05 +08:00
|
|
|
|
|
|
|
class MVE_VMOV_lane_32<MVE_VMOV_lane_direction dir>
|
|
|
|
: MVE_VMOV_lane<"32", 0b0, (ins MVEVectorIndex<4>:$Idx), dir> {
|
|
|
|
bits<2> Idx;
|
|
|
|
let Inst{22} = 0b0;
|
|
|
|
let Inst{6-5} = 0b00;
|
|
|
|
let Inst{16} = Idx{1};
|
|
|
|
let Inst{21} = Idx{0};
|
|
|
|
|
|
|
|
let Predicates = [HasFPRegsV8_1M];
|
2019-06-19 00:19:59 +08:00
|
|
|
}
|
2019-07-15 19:22:05 +08:00
|
|
|
|
|
|
|
class MVE_VMOV_lane_16<string suffix, bit U, MVE_VMOV_lane_direction dir>
|
|
|
|
: MVE_VMOV_lane<suffix, U, (ins MVEVectorIndex<8>:$Idx), dir> {
|
|
|
|
bits<3> Idx;
|
|
|
|
let Inst{22} = 0b0;
|
|
|
|
let Inst{5} = 0b1;
|
|
|
|
let Inst{16} = Idx{2};
|
|
|
|
let Inst{21} = Idx{1};
|
|
|
|
let Inst{6} = Idx{0};
|
2019-06-19 00:19:59 +08:00
|
|
|
}
|
2019-07-15 19:22:05 +08:00
|
|
|
|
|
|
|
class MVE_VMOV_lane_8<string suffix, bit U, MVE_VMOV_lane_direction dir>
|
|
|
|
: MVE_VMOV_lane<suffix, U, (ins MVEVectorIndex<16>:$Idx), dir> {
|
|
|
|
bits<4> Idx;
|
|
|
|
let Inst{22} = 0b1;
|
|
|
|
let Inst{16} = Idx{3};
|
|
|
|
let Inst{21} = Idx{2};
|
|
|
|
let Inst{6} = Idx{1};
|
|
|
|
let Inst{5} = Idx{0};
|
2019-06-19 00:19:59 +08:00
|
|
|
}
|
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
def MVE_VMOV_from_lane_32 : MVE_VMOV_lane_32< MVE_VMOV_from_lane>;
|
|
|
|
def MVE_VMOV_to_lane_32 : MVE_VMOV_lane_32< MVE_VMOV_to_lane>;
|
|
|
|
def MVE_VMOV_from_lane_s16 : MVE_VMOV_lane_16<"s16", 0b0, MVE_VMOV_from_lane>;
|
|
|
|
def MVE_VMOV_from_lane_u16 : MVE_VMOV_lane_16<"u16", 0b1, MVE_VMOV_from_lane>;
|
|
|
|
def MVE_VMOV_to_lane_16 : MVE_VMOV_lane_16< "16", 0b0, MVE_VMOV_to_lane>;
|
|
|
|
def MVE_VMOV_from_lane_s8 : MVE_VMOV_lane_8 < "s8", 0b0, MVE_VMOV_from_lane>;
|
|
|
|
def MVE_VMOV_from_lane_u8 : MVE_VMOV_lane_8 < "u8", 0b1, MVE_VMOV_from_lane>;
|
|
|
|
def MVE_VMOV_to_lane_8 : MVE_VMOV_lane_8 < "8", 0b0, MVE_VMOV_to_lane>;
|
2019-06-19 00:19:59 +08:00
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
let Predicates = [HasMVEInt] in {
|
|
|
|
def : Pat<(extractelt (v2f64 MQPR:$src), imm:$lane),
|
|
|
|
(f64 (EXTRACT_SUBREG MQPR:$src, (DSubReg_f64_reg imm:$lane)))>;
|
|
|
|
def : Pat<(insertelt (v2f64 MQPR:$src1), DPR:$src2, imm:$lane),
|
|
|
|
(INSERT_SUBREG (v2f64 (COPY_TO_REGCLASS MQPR:$src1, MQPR)), DPR:$src2, (DSubReg_f64_reg imm:$lane))>;
|
2019-06-19 00:19:59 +08:00
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
def : Pat<(extractelt (v4i32 MQPR:$src), imm:$lane),
|
|
|
|
(COPY_TO_REGCLASS
|
|
|
|
(i32 (EXTRACT_SUBREG MQPR:$src, (SSubReg_f32_reg imm:$lane))), rGPR)>;
|
|
|
|
def : Pat<(insertelt (v4i32 MQPR:$src1), rGPR:$src2, imm:$lane),
|
|
|
|
(MVE_VMOV_to_lane_32 MQPR:$src1, rGPR:$src2, imm:$lane)>;
|
2019-06-19 00:19:59 +08:00
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
def : Pat<(vector_insert (v16i8 MQPR:$src1), rGPR:$src2, imm:$lane),
|
|
|
|
(MVE_VMOV_to_lane_8 MQPR:$src1, rGPR:$src2, imm:$lane)>;
|
|
|
|
def : Pat<(vector_insert (v8i16 MQPR:$src1), rGPR:$src2, imm:$lane),
|
|
|
|
(MVE_VMOV_to_lane_16 MQPR:$src1, rGPR:$src2, imm:$lane)>;
|
2019-06-19 00:19:59 +08:00
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
def : Pat<(ARMvgetlanes (v16i8 MQPR:$src), imm:$lane),
|
|
|
|
(MVE_VMOV_from_lane_s8 MQPR:$src, imm:$lane)>;
|
|
|
|
def : Pat<(ARMvgetlanes (v8i16 MQPR:$src), imm:$lane),
|
|
|
|
(MVE_VMOV_from_lane_s16 MQPR:$src, imm:$lane)>;
|
|
|
|
def : Pat<(ARMvgetlaneu (v16i8 MQPR:$src), imm:$lane),
|
|
|
|
(MVE_VMOV_from_lane_u8 MQPR:$src, imm:$lane)>;
|
|
|
|
def : Pat<(ARMvgetlaneu (v8i16 MQPR:$src), imm:$lane),
|
|
|
|
(MVE_VMOV_from_lane_u16 MQPR:$src, imm:$lane)>;
|
2019-06-19 00:19:59 +08:00
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
def : Pat<(v16i8 (scalar_to_vector GPR:$src)),
|
|
|
|
(MVE_VMOV_to_lane_8 (v16i8 (IMPLICIT_DEF)), rGPR:$src, (i32 0))>;
|
|
|
|
def : Pat<(v8i16 (scalar_to_vector GPR:$src)),
|
|
|
|
(MVE_VMOV_to_lane_16 (v8i16 (IMPLICIT_DEF)), rGPR:$src, (i32 0))>;
|
|
|
|
def : Pat<(v4i32 (scalar_to_vector GPR:$src)),
|
|
|
|
(MVE_VMOV_to_lane_32 (v4i32 (IMPLICIT_DEF)), rGPR:$src, (i32 0))>;
|
2019-06-19 00:19:59 +08:00
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
// Floating point patterns, still enabled under HasMVEInt
|
|
|
|
def : Pat<(extractelt (v4f32 MQPR:$src), imm:$lane),
|
|
|
|
(COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG MQPR:$src, (SSubReg_f32_reg imm:$lane))), SPR)>;
|
|
|
|
def : Pat<(insertelt (v4f32 MQPR:$src1), (f32 SPR:$src2), imm:$lane),
|
|
|
|
(INSERT_SUBREG (v4f32 (COPY_TO_REGCLASS MQPR:$src1, MQPR)), SPR:$src2, (SSubReg_f32_reg imm:$lane))>;
|
|
|
|
|
|
|
|
def : Pat<(insertelt (v8f16 MQPR:$src1), HPR:$src2, imm:$lane),
|
|
|
|
(MVE_VMOV_to_lane_16 MQPR:$src1, (COPY_TO_REGCLASS HPR:$src2, rGPR), imm:$lane)>;
|
2019-08-28 18:13:23 +08:00
|
|
|
def : Pat<(extractelt (v8f16 MQPR:$src), imm_even:$lane),
|
|
|
|
(EXTRACT_SUBREG MQPR:$src, (SSubReg_f16_reg imm_even:$lane))>;
|
|
|
|
def : Pat<(extractelt (v8f16 MQPR:$src), imm_odd:$lane),
|
|
|
|
(COPY_TO_REGCLASS
|
|
|
|
(VMOVH (EXTRACT_SUBREG MQPR:$src, (SSubReg_f16_reg imm_odd:$lane))),
|
|
|
|
HPR)>;
|
2019-07-15 19:22:05 +08:00
|
|
|
|
|
|
|
def : Pat<(v4f32 (scalar_to_vector SPR:$src)),
|
|
|
|
(INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>;
|
|
|
|
def : Pat<(v4f32 (scalar_to_vector GPR:$src)),
|
|
|
|
(MVE_VMOV_to_lane_32 (v4f32 (IMPLICIT_DEF)), rGPR:$src, (i32 0))>;
|
|
|
|
def : Pat<(v8f16 (scalar_to_vector HPR:$src)),
|
|
|
|
(INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), HPR:$src, ssub_0)>;
|
|
|
|
def : Pat<(v8f16 (scalar_to_vector GPR:$src)),
|
|
|
|
(MVE_VMOV_to_lane_16 (v8f16 (IMPLICIT_DEF)), rGPR:$src, (i32 0))>;
|
|
|
|
}
|
|
|
|
|
|
|
|
// end of mve_bit instructions
|
|
|
|
|
|
|
|
// start of MVE Integer instructions
|
|
|
|
|
|
|
|
class MVE_int<string iname, string suffix, bits<2> size, list<dag> pattern=[]>
|
|
|
|
: MVE_p<(outs MQPR:$Qd), (ins MQPR:$Qn, MQPR:$Qm), NoItinerary,
|
|
|
|
iname, suffix, "$Qd, $Qn, $Qm", vpred_r, "", pattern> {
|
2019-06-19 00:19:59 +08:00
|
|
|
bits<4> Qd;
|
|
|
|
bits<4> Qn;
|
2019-07-15 19:22:05 +08:00
|
|
|
bits<4> Qm;
|
2019-06-19 00:19:59 +08:00
|
|
|
|
|
|
|
let Inst{22} = Qd{3};
|
|
|
|
let Inst{21-20} = size;
|
|
|
|
let Inst{19-17} = Qn{2-0};
|
|
|
|
let Inst{15-13} = Qd{2-0};
|
|
|
|
let Inst{7} = Qn{3};
|
|
|
|
let Inst{6} = 0b1;
|
|
|
|
let Inst{5} = Qm{3};
|
|
|
|
let Inst{3-1} = Qm{2-0};
|
|
|
|
}
|
|
|
|
|
2019-11-25 22:10:59 +08:00
|
|
|
class MVE_VMULt1<string iname, string suffix, bits<2> size,
|
|
|
|
list<dag> pattern=[]>
|
|
|
|
: MVE_int<iname, suffix, size, pattern> {
|
2019-06-19 00:19:59 +08:00
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
let Inst{28} = 0b0;
|
|
|
|
let Inst{25-23} = 0b110;
|
|
|
|
let Inst{16} = 0b0;
|
|
|
|
let Inst{12-8} = 0b01001;
|
2019-06-19 00:19:59 +08:00
|
|
|
let Inst{4} = 0b1;
|
|
|
|
let Inst{0} = 0b0;
|
2019-11-19 01:07:56 +08:00
|
|
|
let validForTailPredication = 1;
|
2019-06-19 00:19:59 +08:00
|
|
|
}
|
|
|
|
|
2019-11-25 22:10:59 +08:00
|
|
|
multiclass MVE_VMUL_m<string iname, MVEVectorVTInfo VTI,
|
|
|
|
SDNode unpred_op, Intrinsic pred_int> {
|
|
|
|
def "" : MVE_VMULt1<iname, VTI.Suffix, VTI.Size>;
|
2019-06-19 00:19:59 +08:00
|
|
|
|
2019-11-25 22:10:59 +08:00
|
|
|
let Predicates = [HasMVEInt] in {
|
|
|
|
// Unpredicated multiply
|
|
|
|
def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn))),
|
|
|
|
(VTI.Vec (!cast<Instruction>(NAME)
|
|
|
|
(VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>;
|
|
|
|
|
|
|
|
// Predicated multiply
|
|
|
|
def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
|
|
|
|
(VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive))),
|
|
|
|
(VTI.Vec (!cast<Instruction>(NAME)
|
|
|
|
(VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
|
|
|
|
(i32 1), (VTI.Pred VCCR:$mask),
|
|
|
|
(VTI.Vec MQPR:$inactive)))>;
|
|
|
|
}
|
2019-06-19 00:19:59 +08:00
|
|
|
}
|
|
|
|
|
2019-11-25 22:10:59 +08:00
|
|
|
multiclass MVE_VMUL<MVEVectorVTInfo VTI>
|
|
|
|
: MVE_VMUL_m<"vmul", VTI, mul, int_arm_mve_mul_predicated>;
|
|
|
|
|
|
|
|
defm MVE_VMULi8 : MVE_VMUL<MVE_v16i8>;
|
|
|
|
defm MVE_VMULi16 : MVE_VMUL<MVE_v8i16>;
|
|
|
|
defm MVE_VMULi32 : MVE_VMUL<MVE_v4i32>;
|
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
class MVE_VQxDMULH<string iname, string suffix, bits<2> size, bit rounding,
|
|
|
|
list<dag> pattern=[]>
|
|
|
|
: MVE_int<iname, suffix, size, pattern> {
|
2019-06-19 00:19:59 +08:00
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
let Inst{28} = rounding;
|
|
|
|
let Inst{25-23} = 0b110;
|
|
|
|
let Inst{16} = 0b0;
|
|
|
|
let Inst{12-8} = 0b01011;
|
|
|
|
let Inst{4} = 0b0;
|
|
|
|
let Inst{0} = 0b0;
|
2019-06-19 00:19:59 +08:00
|
|
|
}
|
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
class MVE_VQDMULH<string suffix, bits<2> size, list<dag> pattern=[]>
|
|
|
|
: MVE_VQxDMULH<"vqdmulh", suffix, size, 0b0, pattern>;
|
|
|
|
class MVE_VQRDMULH<string suffix, bits<2> size, list<dag> pattern=[]>
|
|
|
|
: MVE_VQxDMULH<"vqrdmulh", suffix, size, 0b1, pattern>;
|
2019-06-19 00:19:59 +08:00
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
def MVE_VQDMULHi8 : MVE_VQDMULH<"s8", 0b00>;
|
|
|
|
def MVE_VQDMULHi16 : MVE_VQDMULH<"s16", 0b01>;
|
|
|
|
def MVE_VQDMULHi32 : MVE_VQDMULH<"s32", 0b10>;
|
2019-06-19 00:19:59 +08:00
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
def MVE_VQRDMULHi8 : MVE_VQRDMULH<"s8", 0b00>;
|
|
|
|
def MVE_VQRDMULHi16 : MVE_VQRDMULH<"s16", 0b01>;
|
|
|
|
def MVE_VQRDMULHi32 : MVE_VQRDMULH<"s32", 0b10>;
|
2019-06-19 00:19:59 +08:00
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
class MVE_VADDSUB<string iname, string suffix, bits<2> size, bit subtract,
|
|
|
|
list<dag> pattern=[]>
|
|
|
|
: MVE_int<iname, suffix, size, pattern> {
|
2019-06-19 00:19:59 +08:00
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
let Inst{28} = subtract;
|
|
|
|
let Inst{25-23} = 0b110;
|
|
|
|
let Inst{16} = 0b0;
|
|
|
|
let Inst{12-8} = 0b01000;
|
|
|
|
let Inst{4} = 0b0;
|
|
|
|
let Inst{0} = 0b0;
|
2019-10-15 21:12:51 +08:00
|
|
|
let validForTailPredication = 1;
|
2019-06-19 00:19:59 +08:00
|
|
|
}
|
|
|
|
|
[ARM] Begin adding IR intrinsics for MVE instructions.
This commit, together with the next few, will add a representative
sample of the kind of IR intrinsics that we'll need in order to
implement the user-facing ACLE intrinsics for MVE. Supporting all of
them will take more work; the intention of this initial series of
commits is to implement an intrinsic or two from lots of different
categories, as examples and proofs of concept.
This initial commit introduces a small number of IR intrinsics for
instructions simple enough that they can use Tablegen ISel patterns:
the predicated versions of the VADD and VSUB instructions (both
integer and FP), VMIN and VMAX, and the float->half VCVT instruction
(predicated and unpredicated).
When using VPT-predicated instructions in automatic code generation,
it will be convenient to specify the predicate value as a vector of
the appropriate number of i1. To make it easy to specify all sizes of
an instruction in one go and give each one the matching predicate
vector type, I've added a system of Tablegen informational records
describing MVE's vector types: each one gives the underlying LLVM IR
ValueType (which may not be the same if the MVE vector is of
explicitly signed or unsigned integers) and an appropriate vNi1 to use
as the predicate vector.
(Also, those info records include the usual encoding for the types, so
that as we add associations between each instruction encoding and one
of the new `MVEVectorVTInfo` records, we can remove some of the
existing template parameters and replace them with references to the
vector type info's fields.)
The user-facing ACLE intrinsics will receive a predicate mask as a
16-bit integer, so I've also provided a pair of intrinsics i2v and
v2i, to convert between an integer and a vector of i1 by just changing
the register class.
Reviewers: dmgreen, miyuki, ostannard
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D67158
2019-10-08 00:00:51 +08:00
|
|
|
multiclass MVE_VADDSUB_m<string iname, MVEVectorVTInfo VTI, bit subtract,
|
|
|
|
SDNode unpred_op, Intrinsic pred_int> {
|
|
|
|
def "" : MVE_VADDSUB<iname, VTI.Suffix, VTI.Size, subtract>;
|
2019-06-19 00:19:59 +08:00
|
|
|
|
[ARM] Begin adding IR intrinsics for MVE instructions.
This commit, together with the next few, will add a representative
sample of the kind of IR intrinsics that we'll need in order to
implement the user-facing ACLE intrinsics for MVE. Supporting all of
them will take more work; the intention of this initial series of
commits is to implement an intrinsic or two from lots of different
categories, as examples and proofs of concept.
This initial commit introduces a small number of IR intrinsics for
instructions simple enough that they can use Tablegen ISel patterns:
the predicated versions of the VADD and VSUB instructions (both
integer and FP), VMIN and VMAX, and the float->half VCVT instruction
(predicated and unpredicated).
When using VPT-predicated instructions in automatic code generation,
it will be convenient to specify the predicate value as a vector of
the appropriate number of i1. To make it easy to specify all sizes of
an instruction in one go and give each one the matching predicate
vector type, I've added a system of Tablegen informational records
describing MVE's vector types: each one gives the underlying LLVM IR
ValueType (which may not be the same if the MVE vector is of
explicitly signed or unsigned integers) and an appropriate vNi1 to use
as the predicate vector.
(Also, those info records include the usual encoding for the types, so
that as we add associations between each instruction encoding and one
of the new `MVEVectorVTInfo` records, we can remove some of the
existing template parameters and replace them with references to the
vector type info's fields.)
The user-facing ACLE intrinsics will receive a predicate mask as a
16-bit integer, so I've also provided a pair of intrinsics i2v and
v2i, to convert between an integer and a vector of i1 by just changing
the register class.
Reviewers: dmgreen, miyuki, ostannard
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D67158
2019-10-08 00:00:51 +08:00
|
|
|
let Predicates = [HasMVEInt] in {
|
|
|
|
// Unpredicated add/subtract
|
|
|
|
def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn))),
|
|
|
|
(VTI.Vec (!cast<Instruction>(NAME)
|
|
|
|
(VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>;
|
2019-06-19 00:19:59 +08:00
|
|
|
|
[ARM] Begin adding IR intrinsics for MVE instructions.
This commit, together with the next few, will add a representative
sample of the kind of IR intrinsics that we'll need in order to
implement the user-facing ACLE intrinsics for MVE. Supporting all of
them will take more work; the intention of this initial series of
commits is to implement an intrinsic or two from lots of different
categories, as examples and proofs of concept.
This initial commit introduces a small number of IR intrinsics for
instructions simple enough that they can use Tablegen ISel patterns:
the predicated versions of the VADD and VSUB instructions (both
integer and FP), VMIN and VMAX, and the float->half VCVT instruction
(predicated and unpredicated).
When using VPT-predicated instructions in automatic code generation,
it will be convenient to specify the predicate value as a vector of
the appropriate number of i1. To make it easy to specify all sizes of
an instruction in one go and give each one the matching predicate
vector type, I've added a system of Tablegen informational records
describing MVE's vector types: each one gives the underlying LLVM IR
ValueType (which may not be the same if the MVE vector is of
explicitly signed or unsigned integers) and an appropriate vNi1 to use
as the predicate vector.
(Also, those info records include the usual encoding for the types, so
that as we add associations between each instruction encoding and one
of the new `MVEVectorVTInfo` records, we can remove some of the
existing template parameters and replace them with references to the
vector type info's fields.)
The user-facing ACLE intrinsics will receive a predicate mask as a
16-bit integer, so I've also provided a pair of intrinsics i2v and
v2i, to convert between an integer and a vector of i1 by just changing
the register class.
Reviewers: dmgreen, miyuki, ostannard
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D67158
2019-10-08 00:00:51 +08:00
|
|
|
// Predicated add/subtract
|
|
|
|
def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
|
|
|
|
(VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive))),
|
|
|
|
(VTI.Vec (!cast<Instruction>(NAME)
|
|
|
|
(VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
|
|
|
|
(i32 1), (VTI.Pred VCCR:$mask),
|
|
|
|
(VTI.Vec MQPR:$inactive)))>;
|
|
|
|
}
|
2019-06-19 00:19:59 +08:00
|
|
|
}
|
|
|
|
|
[ARM] Begin adding IR intrinsics for MVE instructions.
This commit, together with the next few, will add a representative
sample of the kind of IR intrinsics that we'll need in order to
implement the user-facing ACLE intrinsics for MVE. Supporting all of
them will take more work; the intention of this initial series of
commits is to implement an intrinsic or two from lots of different
categories, as examples and proofs of concept.
This initial commit introduces a small number of IR intrinsics for
instructions simple enough that they can use Tablegen ISel patterns:
the predicated versions of the VADD and VSUB instructions (both
integer and FP), VMIN and VMAX, and the float->half VCVT instruction
(predicated and unpredicated).
When using VPT-predicated instructions in automatic code generation,
it will be convenient to specify the predicate value as a vector of
the appropriate number of i1. To make it easy to specify all sizes of
an instruction in one go and give each one the matching predicate
vector type, I've added a system of Tablegen informational records
describing MVE's vector types: each one gives the underlying LLVM IR
ValueType (which may not be the same if the MVE vector is of
explicitly signed or unsigned integers) and an appropriate vNi1 to use
as the predicate vector.
(Also, those info records include the usual encoding for the types, so
that as we add associations between each instruction encoding and one
of the new `MVEVectorVTInfo` records, we can remove some of the
existing template parameters and replace them with references to the
vector type info's fields.)
The user-facing ACLE intrinsics will receive a predicate mask as a
16-bit integer, so I've also provided a pair of intrinsics i2v and
v2i, to convert between an integer and a vector of i1 by just changing
the register class.
Reviewers: dmgreen, miyuki, ostannard
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D67158
2019-10-08 00:00:51 +08:00
|
|
|
multiclass MVE_VADD<MVEVectorVTInfo VTI>
|
|
|
|
: MVE_VADDSUB_m<"vadd", VTI, 0b0, add, int_arm_mve_add_predicated>;
|
|
|
|
multiclass MVE_VSUB<MVEVectorVTInfo VTI>
|
|
|
|
: MVE_VADDSUB_m<"vsub", VTI, 0b1, sub, int_arm_mve_sub_predicated>;
|
2019-06-19 00:19:59 +08:00
|
|
|
|
[ARM] Begin adding IR intrinsics for MVE instructions.
This commit, together with the next few, will add a representative
sample of the kind of IR intrinsics that we'll need in order to
implement the user-facing ACLE intrinsics for MVE. Supporting all of
them will take more work; the intention of this initial series of
commits is to implement an intrinsic or two from lots of different
categories, as examples and proofs of concept.
This initial commit introduces a small number of IR intrinsics for
instructions simple enough that they can use Tablegen ISel patterns:
the predicated versions of the VADD and VSUB instructions (both
integer and FP), VMIN and VMAX, and the float->half VCVT instruction
(predicated and unpredicated).
When using VPT-predicated instructions in automatic code generation,
it will be convenient to specify the predicate value as a vector of
the appropriate number of i1. To make it easy to specify all sizes of
an instruction in one go and give each one the matching predicate
vector type, I've added a system of Tablegen informational records
describing MVE's vector types: each one gives the underlying LLVM IR
ValueType (which may not be the same if the MVE vector is of
explicitly signed or unsigned integers) and an appropriate vNi1 to use
as the predicate vector.
(Also, those info records include the usual encoding for the types, so
that as we add associations between each instruction encoding and one
of the new `MVEVectorVTInfo` records, we can remove some of the
existing template parameters and replace them with references to the
vector type info's fields.)
The user-facing ACLE intrinsics will receive a predicate mask as a
16-bit integer, so I've also provided a pair of intrinsics i2v and
v2i, to convert between an integer and a vector of i1 by just changing
the register class.
Reviewers: dmgreen, miyuki, ostannard
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D67158
2019-10-08 00:00:51 +08:00
|
|
|
defm MVE_VADDi8 : MVE_VADD<MVE_v16i8>;
|
|
|
|
defm MVE_VADDi16 : MVE_VADD<MVE_v8i16>;
|
|
|
|
defm MVE_VADDi32 : MVE_VADD<MVE_v4i32>;
|
|
|
|
|
|
|
|
defm MVE_VSUBi8 : MVE_VSUB<MVE_v16i8>;
|
|
|
|
defm MVE_VSUBi16 : MVE_VSUB<MVE_v8i16>;
|
|
|
|
defm MVE_VSUBi32 : MVE_VSUB<MVE_v4i32>;
|
2019-06-19 00:19:59 +08:00
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
class MVE_VQADDSUB<string iname, string suffix, bit U, bit subtract,
|
2019-10-10 21:05:04 +08:00
|
|
|
bits<2> size, ValueType vt>
|
|
|
|
: MVE_int<iname, suffix, size, []> {
|
2019-06-19 00:19:59 +08:00
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
let Inst{28} = U;
|
|
|
|
let Inst{25-23} = 0b110;
|
|
|
|
let Inst{16} = 0b0;
|
|
|
|
let Inst{12-10} = 0b000;
|
|
|
|
let Inst{9} = subtract;
|
|
|
|
let Inst{8} = 0b0;
|
|
|
|
let Inst{4} = 0b1;
|
|
|
|
let Inst{0} = 0b0;
|
2019-10-15 21:12:51 +08:00
|
|
|
let validForTailPredication = 1;
|
2019-10-10 21:05:04 +08:00
|
|
|
|
|
|
|
ValueType VT = vt;
|
|
|
|
}
|
|
|
|
|
|
|
|
class MVE_VQADD<string suffix, bit U, bits<2> size, ValueType VT>
|
|
|
|
: MVE_VQADDSUB<"vqadd", suffix, U, 0b0, size, VT>;
|
|
|
|
class MVE_VQSUB<string suffix, bit U, bits<2> size, ValueType VT>
|
|
|
|
: MVE_VQADDSUB<"vqsub", suffix, U, 0b1, size, VT>;
|
|
|
|
|
|
|
|
def MVE_VQADDs8 : MVE_VQADD<"s8", 0b0, 0b00, v16i8>;
|
|
|
|
def MVE_VQADDs16 : MVE_VQADD<"s16", 0b0, 0b01, v8i16>;
|
|
|
|
def MVE_VQADDs32 : MVE_VQADD<"s32", 0b0, 0b10, v4i32>;
|
|
|
|
def MVE_VQADDu8 : MVE_VQADD<"u8", 0b1, 0b00, v16i8>;
|
|
|
|
def MVE_VQADDu16 : MVE_VQADD<"u16", 0b1, 0b01, v8i16>;
|
|
|
|
def MVE_VQADDu32 : MVE_VQADD<"u32", 0b1, 0b10, v4i32>;
|
|
|
|
|
|
|
|
def MVE_VQSUBs8 : MVE_VQSUB<"s8", 0b0, 0b00, v16i8>;
|
|
|
|
def MVE_VQSUBs16 : MVE_VQSUB<"s16", 0b0, 0b01, v8i16>;
|
|
|
|
def MVE_VQSUBs32 : MVE_VQSUB<"s32", 0b0, 0b10, v4i32>;
|
|
|
|
def MVE_VQSUBu8 : MVE_VQSUB<"u8", 0b1, 0b00, v16i8>;
|
|
|
|
def MVE_VQSUBu16 : MVE_VQSUB<"u16", 0b1, 0b01, v8i16>;
|
|
|
|
def MVE_VQSUBu32 : MVE_VQSUB<"u32", 0b1, 0b10, v4i32>;
|
|
|
|
|
|
|
|
let Predicates = [HasMVEInt] in {
|
|
|
|
foreach instr = [MVE_VQADDu8, MVE_VQADDu16, MVE_VQADDu32] in
|
|
|
|
foreach VT = [instr.VT] in
|
|
|
|
def : Pat<(VT (uaddsat (VT MQPR:$Qm), (VT MQPR:$Qn))),
|
|
|
|
(VT (instr (VT MQPR:$Qm), (VT MQPR:$Qn)))>;
|
|
|
|
foreach instr = [MVE_VQADDs8, MVE_VQADDs16, MVE_VQADDs32] in
|
|
|
|
foreach VT = [instr.VT] in
|
|
|
|
def : Pat<(VT (saddsat (VT MQPR:$Qm), (VT MQPR:$Qn))),
|
|
|
|
(VT (instr (VT MQPR:$Qm), (VT MQPR:$Qn)))>;
|
2019-10-11 00:34:30 +08:00
|
|
|
foreach instr = [MVE_VQSUBu8, MVE_VQSUBu16, MVE_VQSUBu32] in
|
|
|
|
foreach VT = [instr.VT] in
|
|
|
|
def : Pat<(VT (usubsat (VT MQPR:$Qm), (VT MQPR:$Qn))),
|
|
|
|
(VT (instr (VT MQPR:$Qm), (VT MQPR:$Qn)))>;
|
|
|
|
foreach instr = [MVE_VQSUBs8, MVE_VQSUBs16, MVE_VQSUBs32] in
|
|
|
|
foreach VT = [instr.VT] in
|
|
|
|
def : Pat<(VT (ssubsat (VT MQPR:$Qm), (VT MQPR:$Qn))),
|
|
|
|
(VT (instr (VT MQPR:$Qm), (VT MQPR:$Qn)))>;
|
2019-06-19 00:19:59 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2019-11-14 00:57:28 +08:00
|
|
|
class MVE_VABD_int<string suffix, bit U, bits<2> size,
|
|
|
|
list<dag> pattern=[]>
|
2019-07-15 19:22:05 +08:00
|
|
|
: MVE_int<"vabd", suffix, size, pattern> {
|
2019-06-19 00:19:59 +08:00
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
let Inst{28} = U;
|
|
|
|
let Inst{25-23} = 0b110;
|
|
|
|
let Inst{16} = 0b0;
|
|
|
|
let Inst{12-8} = 0b00111;
|
|
|
|
let Inst{4} = 0b0;
|
|
|
|
let Inst{0} = 0b0;
|
2019-10-15 21:12:51 +08:00
|
|
|
let validForTailPredication = 1;
|
2019-06-19 00:19:59 +08:00
|
|
|
}
|
|
|
|
|
2019-11-14 00:57:28 +08:00
|
|
|
multiclass MVE_VABD_m<MVEVectorVTInfo VTI,
|
|
|
|
Intrinsic unpred_int, Intrinsic pred_int> {
|
|
|
|
def "" : MVE_VABD_int<VTI.Suffix, VTI.Unsigned, VTI.Size>;
|
|
|
|
|
|
|
|
let Predicates = [HasMVEInt] in {
|
|
|
|
// Unpredicated absolute difference
|
|
|
|
def : Pat<(VTI.Vec (unpred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn))),
|
|
|
|
(VTI.Vec (!cast<Instruction>(NAME)
|
|
|
|
(VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>;
|
|
|
|
|
|
|
|
// Predicated absolute difference
|
|
|
|
def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
|
|
|
|
(VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive))),
|
|
|
|
(VTI.Vec (!cast<Instruction>(NAME)
|
|
|
|
(VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
|
|
|
|
(i32 1), (VTI.Pred VCCR:$mask),
|
|
|
|
(VTI.Vec MQPR:$inactive)))>;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
multiclass MVE_VABD<MVEVectorVTInfo VTI>
|
|
|
|
: MVE_VABD_m<VTI, int_arm_mve_vabd, int_arm_mve_abd_predicated>;
|
|
|
|
|
|
|
|
defm MVE_VABDs8 : MVE_VABD<MVE_v16s8>;
|
|
|
|
defm MVE_VABDs16 : MVE_VABD<MVE_v8s16>;
|
|
|
|
defm MVE_VABDs32 : MVE_VABD<MVE_v4s32>;
|
|
|
|
defm MVE_VABDu8 : MVE_VABD<MVE_v16u8>;
|
|
|
|
defm MVE_VABDu16 : MVE_VABD<MVE_v8u16>;
|
|
|
|
defm MVE_VABDu32 : MVE_VABD<MVE_v4u32>;
|
2019-06-19 00:19:59 +08:00
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
class MVE_VRHADD<string suffix, bit U, bits<2> size, list<dag> pattern=[]>
|
|
|
|
: MVE_int<"vrhadd", suffix, size, pattern> {
|
2019-06-19 00:19:59 +08:00
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
let Inst{28} = U;
|
|
|
|
let Inst{25-23} = 0b110;
|
|
|
|
let Inst{16} = 0b0;
|
|
|
|
let Inst{12-8} = 0b00001;
|
|
|
|
let Inst{4} = 0b0;
|
|
|
|
let Inst{0} = 0b0;
|
2019-10-15 21:12:51 +08:00
|
|
|
let validForTailPredication = 1;
|
2019-06-19 00:19:59 +08:00
|
|
|
}
|
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
def MVE_VRHADDs8 : MVE_VRHADD<"s8", 0b0, 0b00>;
|
|
|
|
def MVE_VRHADDs16 : MVE_VRHADD<"s16", 0b0, 0b01>;
|
|
|
|
def MVE_VRHADDs32 : MVE_VRHADD<"s32", 0b0, 0b10>;
|
|
|
|
def MVE_VRHADDu8 : MVE_VRHADD<"u8", 0b1, 0b00>;
|
|
|
|
def MVE_VRHADDu16 : MVE_VRHADD<"u16", 0b1, 0b01>;
|
|
|
|
def MVE_VRHADDu32 : MVE_VRHADD<"u32", 0b1, 0b10>;
|
2019-06-19 00:19:59 +08:00
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
class MVE_VHADDSUB<string iname, string suffix, bit U, bit subtract,
|
|
|
|
bits<2> size, list<dag> pattern=[]>
|
|
|
|
: MVE_int<iname, suffix, size, pattern> {
|
2019-06-19 00:19:59 +08:00
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
let Inst{28} = U;
|
|
|
|
let Inst{25-23} = 0b110;
|
|
|
|
let Inst{16} = 0b0;
|
|
|
|
let Inst{12-10} = 0b000;
|
|
|
|
let Inst{9} = subtract;
|
|
|
|
let Inst{8} = 0b0;
|
|
|
|
let Inst{4} = 0b0;
|
|
|
|
let Inst{0} = 0b0;
|
2019-10-15 21:12:51 +08:00
|
|
|
let validForTailPredication = 1;
|
2019-06-19 00:19:59 +08:00
|
|
|
}
|
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
class MVE_VHADD<string suffix, bit U, bits<2> size,
|
|
|
|
list<dag> pattern=[]>
|
|
|
|
: MVE_VHADDSUB<"vhadd", suffix, U, 0b0, size, pattern>;
|
|
|
|
class MVE_VHSUB<string suffix, bit U, bits<2> size,
|
|
|
|
list<dag> pattern=[]>
|
|
|
|
: MVE_VHADDSUB<"vhsub", suffix, U, 0b1, size, pattern>;
|
2019-06-19 00:19:59 +08:00
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
def MVE_VHADDs8 : MVE_VHADD<"s8", 0b0, 0b00>;
|
|
|
|
def MVE_VHADDs16 : MVE_VHADD<"s16", 0b0, 0b01>;
|
|
|
|
def MVE_VHADDs32 : MVE_VHADD<"s32", 0b0, 0b10>;
|
|
|
|
def MVE_VHADDu8 : MVE_VHADD<"u8", 0b1, 0b00>;
|
|
|
|
def MVE_VHADDu16 : MVE_VHADD<"u16", 0b1, 0b01>;
|
|
|
|
def MVE_VHADDu32 : MVE_VHADD<"u32", 0b1, 0b10>;
|
2019-06-19 00:19:59 +08:00
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
def MVE_VHSUBs8 : MVE_VHSUB<"s8", 0b0, 0b00>;
|
|
|
|
def MVE_VHSUBs16 : MVE_VHSUB<"s16", 0b0, 0b01>;
|
|
|
|
def MVE_VHSUBs32 : MVE_VHSUB<"s32", 0b0, 0b10>;
|
|
|
|
def MVE_VHSUBu8 : MVE_VHSUB<"u8", 0b1, 0b00>;
|
|
|
|
def MVE_VHSUBu16 : MVE_VHSUB<"u16", 0b1, 0b01>;
|
|
|
|
def MVE_VHSUBu32 : MVE_VHSUB<"u32", 0b1, 0b10>;
|
2019-06-19 00:19:59 +08:00
|
|
|
|
2019-08-07 18:26:57 +08:00
|
|
|
let Predicates = [HasMVEInt] in {
|
|
|
|
def : Pat<(v16i8 (ARMvshrsImm
|
|
|
|
(v16i8 (add (v16i8 MQPR:$v1), (v16i8 MQPR:$v2))), 1)),
|
|
|
|
(v16i8 (MVE_VHADDs8
|
|
|
|
(v16i8 MQPR:$v1), (v16i8 MQPR:$v2)))>;
|
|
|
|
def : Pat<(v8i16 (ARMvshrsImm
|
|
|
|
(v8i16 (add (v8i16 MQPR:$v1), (v8i16 MQPR:$v2))), 1)),
|
|
|
|
(v8i16 (MVE_VHADDs16
|
|
|
|
(v8i16 MQPR:$v1), (v8i16 MQPR:$v2)))>;
|
|
|
|
def : Pat<(v4i32 (ARMvshrsImm
|
|
|
|
(v4i32 (add (v4i32 MQPR:$v1), (v4i32 MQPR:$v2))), 1)),
|
|
|
|
(v4i32 (MVE_VHADDs32
|
|
|
|
(v4i32 MQPR:$v1), (v4i32 MQPR:$v2)))>;
|
|
|
|
|
|
|
|
def : Pat<(v16i8 (ARMvshruImm
|
|
|
|
(v16i8 (add (v16i8 MQPR:$v1), (v16i8 MQPR:$v2))), 1)),
|
|
|
|
(v16i8 (MVE_VHADDu8
|
|
|
|
(v16i8 MQPR:$v1), (v16i8 MQPR:$v2)))>;
|
|
|
|
def : Pat<(v8i16 (ARMvshruImm
|
|
|
|
(v8i16 (add (v8i16 MQPR:$v1), (v8i16 MQPR:$v2))), 1)),
|
|
|
|
(v8i16 (MVE_VHADDu16
|
|
|
|
(v8i16 MQPR:$v1), (v8i16 MQPR:$v2)))>;
|
|
|
|
def : Pat<(v4i32 (ARMvshruImm
|
|
|
|
(v4i32 (add (v4i32 MQPR:$v1), (v4i32 MQPR:$v2))), 1)),
|
|
|
|
(v4i32 (MVE_VHADDu32
|
|
|
|
(v4i32 MQPR:$v1), (v4i32 MQPR:$v2)))>;
|
|
|
|
|
|
|
|
def : Pat<(v16i8 (ARMvshrsImm
|
|
|
|
(v16i8 (sub (v16i8 MQPR:$v1), (v16i8 MQPR:$v2))), 1)),
|
|
|
|
(v16i8 (MVE_VHSUBs8
|
|
|
|
(v16i8 MQPR:$v1), (v16i8 MQPR:$v2)))>;
|
|
|
|
def : Pat<(v8i16 (ARMvshrsImm
|
|
|
|
(v8i16 (sub (v8i16 MQPR:$v1), (v8i16 MQPR:$v2))), 1)),
|
|
|
|
(v8i16 (MVE_VHSUBs16
|
|
|
|
(v8i16 MQPR:$v1), (v8i16 MQPR:$v2)))>;
|
|
|
|
def : Pat<(v4i32 (ARMvshrsImm
|
|
|
|
(v4i32 (sub (v4i32 MQPR:$v1), (v4i32 MQPR:$v2))), 1)),
|
|
|
|
(v4i32 (MVE_VHSUBs32
|
|
|
|
(v4i32 MQPR:$v1), (v4i32 MQPR:$v2)))>;
|
|
|
|
|
|
|
|
def : Pat<(v16i8 (ARMvshruImm
|
|
|
|
(v16i8 (sub (v16i8 MQPR:$v1), (v16i8 MQPR:$v2))), 1)),
|
|
|
|
(v16i8 (MVE_VHSUBu8
|
|
|
|
(v16i8 MQPR:$v1), (v16i8 MQPR:$v2)))>;
|
|
|
|
def : Pat<(v8i16 (ARMvshruImm
|
|
|
|
(v8i16 (sub (v8i16 MQPR:$v1), (v8i16 MQPR:$v2))), 1)),
|
|
|
|
(v8i16 (MVE_VHSUBu16
|
|
|
|
(v8i16 MQPR:$v1), (v8i16 MQPR:$v2)))>;
|
|
|
|
def : Pat<(v4i32 (ARMvshruImm
|
|
|
|
(v4i32 (sub (v4i32 MQPR:$v1), (v4i32 MQPR:$v2))), 1)),
|
|
|
|
(v4i32 (MVE_VHSUBu32
|
|
|
|
(v4i32 MQPR:$v1), (v4i32 MQPR:$v2)))>;
|
|
|
|
}
|
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
class MVE_VDUP<string suffix, bit B, bit E, list<dag> pattern=[]>
|
|
|
|
: MVE_p<(outs MQPR:$Qd), (ins rGPR:$Rt), NoItinerary,
|
|
|
|
"vdup", suffix, "$Qd, $Rt", vpred_r, "", pattern> {
|
|
|
|
bits<4> Qd;
|
|
|
|
bits<4> Rt;
|
2019-06-19 00:19:59 +08:00
|
|
|
|
|
|
|
let Inst{28} = 0b0;
|
2019-07-15 19:22:05 +08:00
|
|
|
let Inst{25-23} = 0b101;
|
|
|
|
let Inst{22} = B;
|
|
|
|
let Inst{21-20} = 0b10;
|
|
|
|
let Inst{19-17} = Qd{2-0};
|
|
|
|
let Inst{16} = 0b0;
|
|
|
|
let Inst{15-12} = Rt;
|
|
|
|
let Inst{11-8} = 0b1011;
|
|
|
|
let Inst{7} = Qd{3};
|
|
|
|
let Inst{6} = 0b0;
|
|
|
|
let Inst{5} = E;
|
|
|
|
let Inst{4-0} = 0b10000;
|
2019-10-15 21:12:51 +08:00
|
|
|
let validForTailPredication = 1;
|
2019-06-19 00:19:59 +08:00
|
|
|
}
|
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
def MVE_VDUP32 : MVE_VDUP<"32", 0b0, 0b0>;
|
|
|
|
def MVE_VDUP16 : MVE_VDUP<"16", 0b0, 0b1>;
|
|
|
|
def MVE_VDUP8 : MVE_VDUP<"8", 0b1, 0b0>;
|
2019-06-19 00:19:59 +08:00
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
let Predicates = [HasMVEInt] in {
|
|
|
|
def : Pat<(v16i8 (ARMvdup (i32 rGPR:$elem))),
|
|
|
|
(MVE_VDUP8 rGPR:$elem)>;
|
|
|
|
def : Pat<(v8i16 (ARMvdup (i32 rGPR:$elem))),
|
|
|
|
(MVE_VDUP16 rGPR:$elem)>;
|
|
|
|
def : Pat<(v4i32 (ARMvdup (i32 rGPR:$elem))),
|
|
|
|
(MVE_VDUP32 rGPR:$elem)>;
|
2019-06-19 00:19:59 +08:00
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
def : Pat<(v4i32 (ARMvduplane (v4i32 MQPR:$src), imm:$lane)),
|
|
|
|
(MVE_VDUP32 (MVE_VMOV_from_lane_32 MQPR:$src, imm:$lane))>;
|
|
|
|
// For the 16-bit and 8-bit vduplanes we don't care about the signedness
|
|
|
|
// of the lane move operation as we only want the lowest 8/16 bits anyway.
|
|
|
|
def : Pat<(v8i16 (ARMvduplane (v8i16 MQPR:$src), imm:$lane)),
|
|
|
|
(MVE_VDUP16 (MVE_VMOV_from_lane_u16 MQPR:$src, imm:$lane))>;
|
|
|
|
def : Pat<(v16i8 (ARMvduplane (v16i8 MQPR:$src), imm:$lane)),
|
|
|
|
(MVE_VDUP8 (MVE_VMOV_from_lane_u8 MQPR:$src, imm:$lane))>;
|
2019-06-19 00:19:59 +08:00
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
def : Pat<(v4f32 (ARMvdup (f32 SPR:$elem))),
|
|
|
|
(v4f32 (MVE_VDUP32 (i32 (COPY_TO_REGCLASS (f32 SPR:$elem), rGPR))))>;
|
|
|
|
def : Pat<(v8f16 (ARMvdup (f16 HPR:$elem))),
|
|
|
|
(v8f16 (MVE_VDUP16 (i32 (COPY_TO_REGCLASS (f16 HPR:$elem), rGPR))))>;
|
[ARM] Add MVE vector bit-operations (register inputs).
This includes all the obvious bitwise operations (AND, OR, BIC, ORN,
MVN) in register-to-register forms, and the immediate forms of
AND/OR/BIC/ORN; byte-order reverse instructions; and the VMOVs that
access a single lane of a vector.
Some of those VMOVs (specifically, the ones that access a 32-bit lane)
share an encoding with existing instructions that were disassembled as
accessing half of a d-register (e.g. `vmov.32 r0, d1[0]`), but in
8.1-M they're now written as accessing a quarter of a q-register (e.g.
`vmov.32 r0, q0[2]`). The older syntax is still accepted by the
assembler.
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62673
llvm-svn: 363838
2019-06-20 00:43:53 +08:00
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
def : Pat<(v4f32 (ARMvduplane (v4f32 MQPR:$src), imm:$lane)),
|
|
|
|
(MVE_VDUP32 (MVE_VMOV_from_lane_32 MQPR:$src, imm:$lane))>;
|
|
|
|
def : Pat<(v8f16 (ARMvduplane (v8f16 MQPR:$src), imm:$lane)),
|
|
|
|
(MVE_VDUP16 (MVE_VMOV_from_lane_u16 MQPR:$src, imm:$lane))>;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
class MVEIntSingleSrc<string iname, string suffix, bits<2> size,
|
|
|
|
list<dag> pattern=[]>
|
|
|
|
: MVE_p<(outs MQPR:$Qd), (ins MQPR:$Qm), NoItinerary,
|
|
|
|
iname, suffix, "$Qd, $Qm", vpred_r, "", pattern> {
|
[ARM] Add MVE vector bit-operations (register inputs).
This includes all the obvious bitwise operations (AND, OR, BIC, ORN,
MVN) in register-to-register forms, and the immediate forms of
AND/OR/BIC/ORN; byte-order reverse instructions; and the VMOVs that
access a single lane of a vector.
Some of those VMOVs (specifically, the ones that access a 32-bit lane)
share an encoding with existing instructions that were disassembled as
accessing half of a d-register (e.g. `vmov.32 r0, d1[0]`), but in
8.1-M they're now written as accessing a quarter of a q-register (e.g.
`vmov.32 r0, q0[2]`). The older syntax is still accepted by the
assembler.
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62673
llvm-svn: 363838
2019-06-20 00:43:53 +08:00
|
|
|
bits<4> Qd;
|
|
|
|
bits<4> Qm;
|
|
|
|
|
|
|
|
let Inst{22} = Qd{3};
|
2019-07-15 19:22:05 +08:00
|
|
|
let Inst{19-18} = size{1-0};
|
[ARM] Add MVE vector bit-operations (register inputs).
This includes all the obvious bitwise operations (AND, OR, BIC, ORN,
MVN) in register-to-register forms, and the immediate forms of
AND/OR/BIC/ORN; byte-order reverse instructions; and the VMOVs that
access a single lane of a vector.
Some of those VMOVs (specifically, the ones that access a 32-bit lane)
share an encoding with existing instructions that were disassembled as
accessing half of a d-register (e.g. `vmov.32 r0, d1[0]`), but in
8.1-M they're now written as accessing a quarter of a q-register (e.g.
`vmov.32 r0, q0[2]`). The older syntax is still accepted by the
assembler.
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62673
llvm-svn: 363838
2019-06-20 00:43:53 +08:00
|
|
|
let Inst{15-13} = Qd{2-0};
|
|
|
|
let Inst{5} = Qm{3};
|
|
|
|
let Inst{3-1} = Qm{2-0};
|
|
|
|
}
|
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
class MVE_VCLSCLZ<string iname, string suffix, bits<2> size,
|
|
|
|
bit count_zeroes, list<dag> pattern=[]>
|
|
|
|
: MVEIntSingleSrc<iname, suffix, size, pattern> {
|
[ARM] Add MVE vector bit-operations (register inputs).
This includes all the obvious bitwise operations (AND, OR, BIC, ORN,
MVN) in register-to-register forms, and the immediate forms of
AND/OR/BIC/ORN; byte-order reverse instructions; and the VMOVs that
access a single lane of a vector.
Some of those VMOVs (specifically, the ones that access a 32-bit lane)
share an encoding with existing instructions that were disassembled as
accessing half of a d-register (e.g. `vmov.32 r0, d1[0]`), but in
8.1-M they're now written as accessing a quarter of a q-register (e.g.
`vmov.32 r0, q0[2]`). The older syntax is still accepted by the
assembler.
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62673
llvm-svn: 363838
2019-06-20 00:43:53 +08:00
|
|
|
|
|
|
|
let Inst{28} = 0b1;
|
|
|
|
let Inst{25-23} = 0b111;
|
|
|
|
let Inst{21-20} = 0b11;
|
|
|
|
let Inst{17-16} = 0b00;
|
2019-07-15 19:22:05 +08:00
|
|
|
let Inst{12-8} = 0b00100;
|
|
|
|
let Inst{7} = count_zeroes;
|
[ARM] Add MVE vector bit-operations (register inputs).
This includes all the obvious bitwise operations (AND, OR, BIC, ORN,
MVN) in register-to-register forms, and the immediate forms of
AND/OR/BIC/ORN; byte-order reverse instructions; and the VMOVs that
access a single lane of a vector.
Some of those VMOVs (specifically, the ones that access a 32-bit lane)
share an encoding with existing instructions that were disassembled as
accessing half of a d-register (e.g. `vmov.32 r0, d1[0]`), but in
8.1-M they're now written as accessing a quarter of a q-register (e.g.
`vmov.32 r0, q0[2]`). The older syntax is still accepted by the
assembler.
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62673
llvm-svn: 363838
2019-06-20 00:43:53 +08:00
|
|
|
let Inst{6} = 0b1;
|
|
|
|
let Inst{4} = 0b0;
|
|
|
|
let Inst{0} = 0b0;
|
2019-10-15 21:12:51 +08:00
|
|
|
let validForTailPredication = 1;
|
[ARM] Add MVE vector bit-operations (register inputs).
This includes all the obvious bitwise operations (AND, OR, BIC, ORN,
MVN) in register-to-register forms, and the immediate forms of
AND/OR/BIC/ORN; byte-order reverse instructions; and the VMOVs that
access a single lane of a vector.
Some of those VMOVs (specifically, the ones that access a 32-bit lane)
share an encoding with existing instructions that were disassembled as
accessing half of a d-register (e.g. `vmov.32 r0, d1[0]`), but in
8.1-M they're now written as accessing a quarter of a q-register (e.g.
`vmov.32 r0, q0[2]`). The older syntax is still accepted by the
assembler.
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62673
llvm-svn: 363838
2019-06-20 00:43:53 +08:00
|
|
|
}
|
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
def MVE_VCLSs8 : MVE_VCLSCLZ<"vcls", "s8", 0b00, 0b0>;
|
|
|
|
def MVE_VCLSs16 : MVE_VCLSCLZ<"vcls", "s16", 0b01, 0b0>;
|
|
|
|
def MVE_VCLSs32 : MVE_VCLSCLZ<"vcls", "s32", 0b10, 0b0>;
|
2019-06-28 15:08:42 +08:00
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
def MVE_VCLZs8 : MVE_VCLSCLZ<"vclz", "i8", 0b00, 0b1>;
|
|
|
|
def MVE_VCLZs16 : MVE_VCLSCLZ<"vclz", "i16", 0b01, 0b1>;
|
|
|
|
def MVE_VCLZs32 : MVE_VCLSCLZ<"vclz", "i32", 0b10, 0b1>;
|
2019-06-28 15:08:42 +08:00
|
|
|
|
2019-09-16 23:19:49 +08:00
|
|
|
let Predicates = [HasMVEInt] in {
|
|
|
|
def : Pat<(v16i8 ( ctlz (v16i8 MQPR:$val1))),
|
|
|
|
(v16i8 ( MVE_VCLZs8 (v16i8 MQPR:$val1)))>;
|
|
|
|
def : Pat<(v4i32 ( ctlz (v4i32 MQPR:$val1))),
|
|
|
|
(v4i32 ( MVE_VCLZs32 (v4i32 MQPR:$val1)))>;
|
|
|
|
def : Pat<(v8i16 ( ctlz (v8i16 MQPR:$val1))),
|
|
|
|
(v8i16 ( MVE_VCLZs16 (v8i16 MQPR:$val1)))>;
|
|
|
|
}
|
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
class MVE_VABSNEG_int<string iname, string suffix, bits<2> size, bit negate,
|
|
|
|
list<dag> pattern=[]>
|
|
|
|
: MVEIntSingleSrc<iname, suffix, size, pattern> {
|
2019-06-28 15:08:42 +08:00
|
|
|
|
[ARM] Add MVE vector bit-operations (register inputs).
This includes all the obvious bitwise operations (AND, OR, BIC, ORN,
MVN) in register-to-register forms, and the immediate forms of
AND/OR/BIC/ORN; byte-order reverse instructions; and the VMOVs that
access a single lane of a vector.
Some of those VMOVs (specifically, the ones that access a 32-bit lane)
share an encoding with existing instructions that were disassembled as
accessing half of a d-register (e.g. `vmov.32 r0, d1[0]`), but in
8.1-M they're now written as accessing a quarter of a q-register (e.g.
`vmov.32 r0, q0[2]`). The older syntax is still accepted by the
assembler.
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62673
llvm-svn: 363838
2019-06-20 00:43:53 +08:00
|
|
|
let Inst{28} = 0b1;
|
|
|
|
let Inst{25-23} = 0b111;
|
2019-07-15 19:22:05 +08:00
|
|
|
let Inst{21-20} = 0b11;
|
|
|
|
let Inst{17-16} = 0b01;
|
|
|
|
let Inst{12-8} = 0b00011;
|
|
|
|
let Inst{7} = negate;
|
[ARM] Add MVE vector bit-operations (register inputs).
This includes all the obvious bitwise operations (AND, OR, BIC, ORN,
MVN) in register-to-register forms, and the immediate forms of
AND/OR/BIC/ORN; byte-order reverse instructions; and the VMOVs that
access a single lane of a vector.
Some of those VMOVs (specifically, the ones that access a 32-bit lane)
share an encoding with existing instructions that were disassembled as
accessing half of a d-register (e.g. `vmov.32 r0, d1[0]`), but in
8.1-M they're now written as accessing a quarter of a q-register (e.g.
`vmov.32 r0, q0[2]`). The older syntax is still accepted by the
assembler.
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62673
llvm-svn: 363838
2019-06-20 00:43:53 +08:00
|
|
|
let Inst{6} = 0b1;
|
2019-07-15 19:22:05 +08:00
|
|
|
let Inst{4} = 0b0;
|
[ARM] Add MVE vector bit-operations (register inputs).
This includes all the obvious bitwise operations (AND, OR, BIC, ORN,
MVN) in register-to-register forms, and the immediate forms of
AND/OR/BIC/ORN; byte-order reverse instructions; and the VMOVs that
access a single lane of a vector.
Some of those VMOVs (specifically, the ones that access a 32-bit lane)
share an encoding with existing instructions that were disassembled as
accessing half of a d-register (e.g. `vmov.32 r0, d1[0]`), but in
8.1-M they're now written as accessing a quarter of a q-register (e.g.
`vmov.32 r0, q0[2]`). The older syntax is still accepted by the
assembler.
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62673
llvm-svn: 363838
2019-06-20 00:43:53 +08:00
|
|
|
let Inst{0} = 0b0;
|
2019-10-15 21:12:51 +08:00
|
|
|
let validForTailPredication = 1;
|
[ARM] Add MVE vector bit-operations (register inputs).
This includes all the obvious bitwise operations (AND, OR, BIC, ORN,
MVN) in register-to-register forms, and the immediate forms of
AND/OR/BIC/ORN; byte-order reverse instructions; and the VMOVs that
access a single lane of a vector.
Some of those VMOVs (specifically, the ones that access a 32-bit lane)
share an encoding with existing instructions that were disassembled as
accessing half of a d-register (e.g. `vmov.32 r0, d1[0]`), but in
8.1-M they're now written as accessing a quarter of a q-register (e.g.
`vmov.32 r0, q0[2]`). The older syntax is still accepted by the
assembler.
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62673
llvm-svn: 363838
2019-06-20 00:43:53 +08:00
|
|
|
}
|
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
def MVE_VABSs8 : MVE_VABSNEG_int<"vabs", "s8", 0b00, 0b0>;
|
|
|
|
def MVE_VABSs16 : MVE_VABSNEG_int<"vabs", "s16", 0b01, 0b0>;
|
|
|
|
def MVE_VABSs32 : MVE_VABSNEG_int<"vabs", "s32", 0b10, 0b0>;
|
[ARM] Add MVE vector bit-operations (register inputs).
This includes all the obvious bitwise operations (AND, OR, BIC, ORN,
MVN) in register-to-register forms, and the immediate forms of
AND/OR/BIC/ORN; byte-order reverse instructions; and the VMOVs that
access a single lane of a vector.
Some of those VMOVs (specifically, the ones that access a 32-bit lane)
share an encoding with existing instructions that were disassembled as
accessing half of a d-register (e.g. `vmov.32 r0, d1[0]`), but in
8.1-M they're now written as accessing a quarter of a q-register (e.g.
`vmov.32 r0, q0[2]`). The older syntax is still accepted by the
assembler.
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62673
llvm-svn: 363838
2019-06-20 00:43:53 +08:00
|
|
|
|
2019-07-04 16:41:23 +08:00
|
|
|
let Predicates = [HasMVEInt] in {
|
2019-07-15 19:22:05 +08:00
|
|
|
def : Pat<(v16i8 (abs (v16i8 MQPR:$v))),
|
|
|
|
(v16i8 (MVE_VABSs8 $v))>;
|
|
|
|
def : Pat<(v8i16 (abs (v8i16 MQPR:$v))),
|
|
|
|
(v8i16 (MVE_VABSs16 $v))>;
|
|
|
|
def : Pat<(v4i32 (abs (v4i32 MQPR:$v))),
|
|
|
|
(v4i32 (MVE_VABSs32 $v))>;
|
[ARM] Add MVE vector bit-operations (register inputs).
This includes all the obvious bitwise operations (AND, OR, BIC, ORN,
MVN) in register-to-register forms, and the immediate forms of
AND/OR/BIC/ORN; byte-order reverse instructions; and the VMOVs that
access a single lane of a vector.
Some of those VMOVs (specifically, the ones that access a 32-bit lane)
share an encoding with existing instructions that were disassembled as
accessing half of a d-register (e.g. `vmov.32 r0, d1[0]`), but in
8.1-M they're now written as accessing a quarter of a q-register (e.g.
`vmov.32 r0, q0[2]`). The older syntax is still accepted by the
assembler.
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62673
llvm-svn: 363838
2019-06-20 00:43:53 +08:00
|
|
|
}
|
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
def MVE_VNEGs8 : MVE_VABSNEG_int<"vneg", "s8", 0b00, 0b1>;
|
|
|
|
def MVE_VNEGs16 : MVE_VABSNEG_int<"vneg", "s16", 0b01, 0b1>;
|
|
|
|
def MVE_VNEGs32 : MVE_VABSNEG_int<"vneg", "s32", 0b10, 0b1>;
|
[ARM] Add MVE vector bit-operations (register inputs).
This includes all the obvious bitwise operations (AND, OR, BIC, ORN,
MVN) in register-to-register forms, and the immediate forms of
AND/OR/BIC/ORN; byte-order reverse instructions; and the VMOVs that
access a single lane of a vector.
Some of those VMOVs (specifically, the ones that access a 32-bit lane)
share an encoding with existing instructions that were disassembled as
accessing half of a d-register (e.g. `vmov.32 r0, d1[0]`), but in
8.1-M they're now written as accessing a quarter of a q-register (e.g.
`vmov.32 r0, q0[2]`). The older syntax is still accepted by the
assembler.
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62673
llvm-svn: 363838
2019-06-20 00:43:53 +08:00
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
let Predicates = [HasMVEInt] in {
|
|
|
|
def : Pat<(v16i8 (vnegq (v16i8 MQPR:$v))),
|
|
|
|
(v16i8 (MVE_VNEGs8 $v))>;
|
|
|
|
def : Pat<(v8i16 (vnegq (v8i16 MQPR:$v))),
|
|
|
|
(v8i16 (MVE_VNEGs16 $v))>;
|
|
|
|
def : Pat<(v4i32 (vnegq (v4i32 MQPR:$v))),
|
|
|
|
(v4i32 (MVE_VNEGs32 $v))>;
|
[ARM] Add MVE vector bit-operations (register inputs).
This includes all the obvious bitwise operations (AND, OR, BIC, ORN,
MVN) in register-to-register forms, and the immediate forms of
AND/OR/BIC/ORN; byte-order reverse instructions; and the VMOVs that
access a single lane of a vector.
Some of those VMOVs (specifically, the ones that access a 32-bit lane)
share an encoding with existing instructions that were disassembled as
accessing half of a d-register (e.g. `vmov.32 r0, d1[0]`), but in
8.1-M they're now written as accessing a quarter of a q-register (e.g.
`vmov.32 r0, q0[2]`). The older syntax is still accepted by the
assembler.
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62673
llvm-svn: 363838
2019-06-20 00:43:53 +08:00
|
|
|
}
|
2019-07-15 19:22:05 +08:00
|
|
|
|
|
|
|
class MVE_VQABSNEG<string iname, string suffix, bits<2> size,
|
|
|
|
bit negate, list<dag> pattern=[]>
|
|
|
|
: MVEIntSingleSrc<iname, suffix, size, pattern> {
|
|
|
|
|
|
|
|
let Inst{28} = 0b1;
|
|
|
|
let Inst{25-23} = 0b111;
|
|
|
|
let Inst{21-20} = 0b11;
|
|
|
|
let Inst{17-16} = 0b00;
|
|
|
|
let Inst{12-8} = 0b00111;
|
|
|
|
let Inst{7} = negate;
|
|
|
|
let Inst{6} = 0b1;
|
|
|
|
let Inst{4} = 0b0;
|
|
|
|
let Inst{0} = 0b0;
|
2019-10-15 21:12:51 +08:00
|
|
|
let validForTailPredication = 1;
|
[ARM] Add MVE vector bit-operations (register inputs).
This includes all the obvious bitwise operations (AND, OR, BIC, ORN,
MVN) in register-to-register forms, and the immediate forms of
AND/OR/BIC/ORN; byte-order reverse instructions; and the VMOVs that
access a single lane of a vector.
Some of those VMOVs (specifically, the ones that access a 32-bit lane)
share an encoding with existing instructions that were disassembled as
accessing half of a d-register (e.g. `vmov.32 r0, d1[0]`), but in
8.1-M they're now written as accessing a quarter of a q-register (e.g.
`vmov.32 r0, q0[2]`). The older syntax is still accepted by the
assembler.
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62673
llvm-svn: 363838
2019-06-20 00:43:53 +08:00
|
|
|
}
|
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
def MVE_VQABSs8 : MVE_VQABSNEG<"vqabs", "s8", 0b00, 0b0>;
|
|
|
|
def MVE_VQABSs16 : MVE_VQABSNEG<"vqabs", "s16", 0b01, 0b0>;
|
|
|
|
def MVE_VQABSs32 : MVE_VQABSNEG<"vqabs", "s32", 0b10, 0b0>;
|
|
|
|
|
|
|
|
def MVE_VQNEGs8 : MVE_VQABSNEG<"vqneg", "s8", 0b00, 0b1>;
|
|
|
|
def MVE_VQNEGs16 : MVE_VQABSNEG<"vqneg", "s16", 0b01, 0b1>;
|
|
|
|
def MVE_VQNEGs32 : MVE_VQABSNEG<"vqneg", "s32", 0b10, 0b1>;
|
|
|
|
|
2019-11-20 21:57:54 +08:00
|
|
|
// int_min/int_max: vector containing INT_MIN/INT_MAX VTI.Size times
|
|
|
|
// zero_vec: v4i32-initialized zero vector, potentially wrapped in a bitconvert
|
2019-11-25 19:17:13 +08:00
|
|
|
multiclass vqabsneg_pattern<MVEVectorVTInfo VTI, dag int_min, dag int_max,
|
|
|
|
dag zero_vec, MVE_VQABSNEG vqabs_instruction,
|
|
|
|
MVE_VQABSNEG vqneg_instruction> {
|
2019-11-20 21:57:54 +08:00
|
|
|
let Predicates = [HasMVEInt] in {
|
2019-11-25 19:17:13 +08:00
|
|
|
// The below tree can be replaced by a vqabs instruction, as it represents
|
|
|
|
// the following vectorized expression (r being the value in $reg):
|
|
|
|
// r > 0 ? r : (r == INT_MIN ? INT_MAX : -r)
|
2019-11-20 21:57:54 +08:00
|
|
|
def : Pat<(VTI.Vec (vselect
|
|
|
|
(VTI.Pred (ARMvcmpz (VTI.Vec MQPR:$reg), (i32 12))),
|
|
|
|
(VTI.Vec MQPR:$reg),
|
|
|
|
(VTI.Vec (vselect
|
|
|
|
(VTI.Pred (ARMvcmp (VTI.Vec MQPR:$reg), int_min, (i32 0))),
|
|
|
|
int_max,
|
|
|
|
(sub (VTI.Vec zero_vec), (VTI.Vec MQPR:$reg)))))),
|
|
|
|
(VTI.Vec (vqabs_instruction (VTI.Vec MQPR:$reg)))>;
|
2019-11-25 19:17:13 +08:00
|
|
|
// Similarly, this tree represents vqneg, i.e. the following vectorized expression:
|
|
|
|
// r == INT_MIN ? INT_MAX : -r
|
|
|
|
def : Pat<(VTI.Vec (vselect
|
|
|
|
(VTI.Pred (ARMvcmp (VTI.Vec MQPR:$reg), int_min, (i32 0))),
|
|
|
|
int_max,
|
|
|
|
(sub (VTI.Vec zero_vec), (VTI.Vec MQPR:$reg)))),
|
|
|
|
(VTI.Vec (vqneg_instruction (VTI.Vec MQPR:$reg)))>;
|
2019-11-20 21:57:54 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-11-25 19:17:13 +08:00
|
|
|
defm MVE_VQABSNEG_Ps8 : vqabsneg_pattern<MVE_v16i8,
|
2019-11-20 21:57:54 +08:00
|
|
|
(v16i8 (ARMvmovImm (i32 3712))),
|
|
|
|
(v16i8 (ARMvmovImm (i32 3711))),
|
|
|
|
(bitconvert (v4i32 (ARMvmovImm (i32 0)))),
|
2019-11-25 19:17:13 +08:00
|
|
|
MVE_VQABSs8, MVE_VQNEGs8>;
|
|
|
|
defm MVE_VQABSNEG_Ps16 : vqabsneg_pattern<MVE_v8i16,
|
2019-11-20 21:57:54 +08:00
|
|
|
(v8i16 (ARMvmovImm (i32 2688))),
|
|
|
|
(v8i16 (ARMvmvnImm (i32 2688))),
|
|
|
|
(bitconvert (v4i32 (ARMvmovImm (i32 0)))),
|
2019-11-25 19:17:13 +08:00
|
|
|
MVE_VQABSs16, MVE_VQNEGs16>;
|
|
|
|
defm MVE_VQABSNEG_Ps32 : vqabsneg_pattern<MVE_v4i32,
|
2019-11-20 21:57:54 +08:00
|
|
|
(v4i32 (ARMvmovImm (i32 1664))),
|
|
|
|
(v4i32 (ARMvmvnImm (i32 1664))),
|
|
|
|
(ARMvmovImm (i32 0)),
|
2019-11-25 19:17:13 +08:00
|
|
|
MVE_VQABSs32, MVE_VQNEGs32>;
|
2019-11-20 21:57:54 +08:00
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
class MVE_mod_imm<string iname, string suffix, bits<4> cmode, bit op,
|
|
|
|
dag iops, list<dag> pattern=[]>
|
|
|
|
: MVE_p<(outs MQPR:$Qd), iops, NoItinerary, iname, suffix, "$Qd, $imm",
|
|
|
|
vpred_r, "", pattern> {
|
|
|
|
bits<13> imm;
|
[ARM] Add MVE vector bit-operations (register inputs).
This includes all the obvious bitwise operations (AND, OR, BIC, ORN,
MVN) in register-to-register forms, and the immediate forms of
AND/OR/BIC/ORN; byte-order reverse instructions; and the VMOVs that
access a single lane of a vector.
Some of those VMOVs (specifically, the ones that access a 32-bit lane)
share an encoding with existing instructions that were disassembled as
accessing half of a d-register (e.g. `vmov.32 r0, d1[0]`), but in
8.1-M they're now written as accessing a quarter of a q-register (e.g.
`vmov.32 r0, q0[2]`). The older syntax is still accepted by the
assembler.
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62673
llvm-svn: 363838
2019-06-20 00:43:53 +08:00
|
|
|
bits<4> Qd;
|
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
let Inst{28} = imm{7};
|
|
|
|
let Inst{25-23} = 0b111;
|
|
|
|
let Inst{22} = Qd{3};
|
|
|
|
let Inst{21-19} = 0b000;
|
|
|
|
let Inst{18-16} = imm{6-4};
|
|
|
|
let Inst{15-13} = Qd{2-0};
|
|
|
|
let Inst{12} = 0b0;
|
|
|
|
let Inst{11-8} = cmode{3-0};
|
|
|
|
let Inst{7-6} = 0b01;
|
|
|
|
let Inst{5} = op;
|
|
|
|
let Inst{4} = 0b1;
|
|
|
|
let Inst{3-0} = imm{3-0};
|
|
|
|
|
|
|
|
let DecoderMethod = "DecodeMVEModImmInstruction";
|
2019-10-15 21:12:51 +08:00
|
|
|
let validForTailPredication = 1;
|
[ARM] Add MVE vector bit-operations (register inputs).
This includes all the obvious bitwise operations (AND, OR, BIC, ORN,
MVN) in register-to-register forms, and the immediate forms of
AND/OR/BIC/ORN; byte-order reverse instructions; and the VMOVs that
access a single lane of a vector.
Some of those VMOVs (specifically, the ones that access a 32-bit lane)
share an encoding with existing instructions that were disassembled as
accessing half of a d-register (e.g. `vmov.32 r0, d1[0]`), but in
8.1-M they're now written as accessing a quarter of a q-register (e.g.
`vmov.32 r0, q0[2]`). The older syntax is still accepted by the
assembler.
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62673
llvm-svn: 363838
2019-06-20 00:43:53 +08:00
|
|
|
}
|
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
let isReMaterializable = 1 in {
|
|
|
|
let isAsCheapAsAMove = 1 in {
|
|
|
|
def MVE_VMOVimmi8 : MVE_mod_imm<"vmov", "i8", {1,1,1,0}, 0b0, (ins nImmSplatI8:$imm)>;
|
|
|
|
def MVE_VMOVimmi16 : MVE_mod_imm<"vmov", "i16", {1,0,?,0}, 0b0, (ins nImmSplatI16:$imm)> {
|
|
|
|
let Inst{9} = imm{9};
|
|
|
|
}
|
|
|
|
def MVE_VMOVimmi32 : MVE_mod_imm<"vmov", "i32", {?,?,?,?}, 0b0, (ins nImmVMOVI32:$imm)> {
|
|
|
|
let Inst{11-8} = imm{11-8};
|
|
|
|
}
|
|
|
|
def MVE_VMOVimmi64 : MVE_mod_imm<"vmov", "i64", {1,1,1,0}, 0b1, (ins nImmSplatI64:$imm)>;
|
|
|
|
def MVE_VMOVimmf32 : MVE_mod_imm<"vmov", "f32", {1,1,1,1}, 0b0, (ins nImmVMOVF32:$imm)>;
|
|
|
|
} // let isAsCheapAsAMove = 1
|
[ARM] Add MVE vector bit-operations (register inputs).
This includes all the obvious bitwise operations (AND, OR, BIC, ORN,
MVN) in register-to-register forms, and the immediate forms of
AND/OR/BIC/ORN; byte-order reverse instructions; and the VMOVs that
access a single lane of a vector.
Some of those VMOVs (specifically, the ones that access a 32-bit lane)
share an encoding with existing instructions that were disassembled as
accessing half of a d-register (e.g. `vmov.32 r0, d1[0]`), but in
8.1-M they're now written as accessing a quarter of a q-register (e.g.
`vmov.32 r0, q0[2]`). The older syntax is still accepted by the
assembler.
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62673
llvm-svn: 363838
2019-06-20 00:43:53 +08:00
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
def MVE_VMVNimmi16 : MVE_mod_imm<"vmvn", "i16", {1,0,?,0}, 0b1, (ins nImmSplatI16:$imm)> {
|
|
|
|
let Inst{9} = imm{9};
|
|
|
|
}
|
|
|
|
def MVE_VMVNimmi32 : MVE_mod_imm<"vmvn", "i32", {?,?,?,?}, 0b1, (ins nImmVMOVI32:$imm)> {
|
|
|
|
let Inst{11-8} = imm{11-8};
|
[ARM] Add MVE vector bit-operations (register inputs).
This includes all the obvious bitwise operations (AND, OR, BIC, ORN,
MVN) in register-to-register forms, and the immediate forms of
AND/OR/BIC/ORN; byte-order reverse instructions; and the VMOVs that
access a single lane of a vector.
Some of those VMOVs (specifically, the ones that access a 32-bit lane)
share an encoding with existing instructions that were disassembled as
accessing half of a d-register (e.g. `vmov.32 r0, d1[0]`), but in
8.1-M they're now written as accessing a quarter of a q-register (e.g.
`vmov.32 r0, q0[2]`). The older syntax is still accepted by the
assembler.
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62673
llvm-svn: 363838
2019-06-20 00:43:53 +08:00
|
|
|
}
|
2019-07-15 19:22:05 +08:00
|
|
|
} // let isReMaterializable = 1
|
[ARM] Add MVE vector bit-operations (register inputs).
This includes all the obvious bitwise operations (AND, OR, BIC, ORN,
MVN) in register-to-register forms, and the immediate forms of
AND/OR/BIC/ORN; byte-order reverse instructions; and the VMOVs that
access a single lane of a vector.
Some of those VMOVs (specifically, the ones that access a 32-bit lane)
share an encoding with existing instructions that were disassembled as
accessing half of a d-register (e.g. `vmov.32 r0, d1[0]`), but in
8.1-M they're now written as accessing a quarter of a q-register (e.g.
`vmov.32 r0, q0[2]`). The older syntax is still accepted by the
assembler.
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62673
llvm-svn: 363838
2019-06-20 00:43:53 +08:00
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
let Predicates = [HasMVEInt] in {
|
|
|
|
def : Pat<(v16i8 (ARMvmovImm timm:$simm)),
|
|
|
|
(v16i8 (MVE_VMOVimmi8 nImmSplatI8:$simm))>;
|
|
|
|
def : Pat<(v8i16 (ARMvmovImm timm:$simm)),
|
|
|
|
(v8i16 (MVE_VMOVimmi16 nImmSplatI16:$simm))>;
|
|
|
|
def : Pat<(v4i32 (ARMvmovImm timm:$simm)),
|
|
|
|
(v4i32 (MVE_VMOVimmi32 nImmVMOVI32:$simm))>;
|
|
|
|
|
|
|
|
def : Pat<(v8i16 (ARMvmvnImm timm:$simm)),
|
|
|
|
(v8i16 (MVE_VMVNimmi16 nImmSplatI16:$simm))>;
|
|
|
|
def : Pat<(v4i32 (ARMvmvnImm timm:$simm)),
|
|
|
|
(v4i32 (MVE_VMVNimmi32 nImmVMOVI32:$simm))>;
|
|
|
|
|
|
|
|
def : Pat<(v4f32 (ARMvmovFPImm timm:$simm)),
|
|
|
|
(v4f32 (MVE_VMOVimmf32 nImmVMOVF32:$simm))>;
|
[ARM] Add MVE vector bit-operations (register inputs).
This includes all the obvious bitwise operations (AND, OR, BIC, ORN,
MVN) in register-to-register forms, and the immediate forms of
AND/OR/BIC/ORN; byte-order reverse instructions; and the VMOVs that
access a single lane of a vector.
Some of those VMOVs (specifically, the ones that access a 32-bit lane)
share an encoding with existing instructions that were disassembled as
accessing half of a d-register (e.g. `vmov.32 r0, d1[0]`), but in
8.1-M they're now written as accessing a quarter of a q-register (e.g.
`vmov.32 r0, q0[2]`). The older syntax is still accepted by the
assembler.
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62673
llvm-svn: 363838
2019-06-20 00:43:53 +08:00
|
|
|
}
|
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
class MVE_VMINMAXA<string iname, string suffix, bits<2> size,
|
|
|
|
bit bit_12, list<dag> pattern=[]>
|
|
|
|
: MVE_p<(outs MQPR:$Qd), (ins MQPR:$Qd_src, MQPR:$Qm),
|
|
|
|
NoItinerary, iname, suffix, "$Qd, $Qm", vpred_n, "$Qd = $Qd_src",
|
|
|
|
pattern> {
|
|
|
|
bits<4> Qd;
|
|
|
|
bits<4> Qm;
|
|
|
|
|
|
|
|
let Inst{28} = 0b0;
|
|
|
|
let Inst{25-23} = 0b100;
|
|
|
|
let Inst{22} = Qd{3};
|
|
|
|
let Inst{21-20} = 0b11;
|
|
|
|
let Inst{19-18} = size;
|
|
|
|
let Inst{17-16} = 0b11;
|
|
|
|
let Inst{15-13} = Qd{2-0};
|
|
|
|
let Inst{12} = bit_12;
|
|
|
|
let Inst{11-6} = 0b111010;
|
|
|
|
let Inst{5} = Qm{3};
|
|
|
|
let Inst{4} = 0b0;
|
|
|
|
let Inst{3-1} = Qm{2-0};
|
|
|
|
let Inst{0} = 0b1;
|
[ARM] Add MVE vector bit-operations (register inputs).
This includes all the obvious bitwise operations (AND, OR, BIC, ORN,
MVN) in register-to-register forms, and the immediate forms of
AND/OR/BIC/ORN; byte-order reverse instructions; and the VMOVs that
access a single lane of a vector.
Some of those VMOVs (specifically, the ones that access a 32-bit lane)
share an encoding with existing instructions that were disassembled as
accessing half of a d-register (e.g. `vmov.32 r0, d1[0]`), but in
8.1-M they're now written as accessing a quarter of a q-register (e.g.
`vmov.32 r0, q0[2]`). The older syntax is still accepted by the
assembler.
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62673
llvm-svn: 363838
2019-06-20 00:43:53 +08:00
|
|
|
}
|
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
def MVE_VMAXAs8 : MVE_VMINMAXA<"vmaxa", "s8", 0b00, 0b0>;
|
|
|
|
def MVE_VMAXAs16 : MVE_VMINMAXA<"vmaxa", "s16", 0b01, 0b0>;
|
|
|
|
def MVE_VMAXAs32 : MVE_VMINMAXA<"vmaxa", "s32", 0b10, 0b0>;
|
[ARM] Add MVE vector bit-operations (register inputs).
This includes all the obvious bitwise operations (AND, OR, BIC, ORN,
MVN) in register-to-register forms, and the immediate forms of
AND/OR/BIC/ORN; byte-order reverse instructions; and the VMOVs that
access a single lane of a vector.
Some of those VMOVs (specifically, the ones that access a 32-bit lane)
share an encoding with existing instructions that were disassembled as
accessing half of a d-register (e.g. `vmov.32 r0, d1[0]`), but in
8.1-M they're now written as accessing a quarter of a q-register (e.g.
`vmov.32 r0, q0[2]`). The older syntax is still accepted by the
assembler.
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62673
llvm-svn: 363838
2019-06-20 00:43:53 +08:00
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
def MVE_VMINAs8 : MVE_VMINMAXA<"vmina", "s8", 0b00, 0b1>;
|
|
|
|
def MVE_VMINAs16 : MVE_VMINMAXA<"vmina", "s16", 0b01, 0b1>;
|
|
|
|
def MVE_VMINAs32 : MVE_VMINMAXA<"vmina", "s32", 0b10, 0b1>;
|
[ARM] MVE: allow soft-float ABI to pass vector types.
Passing a vector type over the soft-float ABI involves it being split
into four GPRs, so the first thing that has to happen at the start of
the function is to recombine those into a vector register. The ABI
types all vectors as v2f64, so we need to support BUILD_VECTOR for
that type, which I do in this patch by allowing it to be expanded in
terms of INSERT_VECTOR_ELT, and writing an ISel pattern for that in
turn. Similarly, I provide a rule for EXTRACT_VECTOR_ELT so that a
returned vector can be marshalled back into GPRs.
While I'm here, I've also added ISD::UNDEF to the list of operations
we turn back on in `setAllExpand`, because I noticed that otherwise it
gets expanded into a BUILD_VECTOR with explicit zero inputs, leading
to pointless machine instructions to zero out a vector register that's
about to have every lane overwritten of in any case.
Reviewers: dmgreen, ostannard
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D63937
llvm-svn: 364910
2019-07-02 19:26:11 +08:00
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
// end of MVE Integer instructions
|
2019-06-28 15:08:42 +08:00
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
// start of mve_imm_shift instructions
|
2019-06-28 15:08:42 +08:00
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
def MVE_VSHLC : MVE_p<(outs rGPR:$RdmDest, MQPR:$Qd),
|
|
|
|
(ins MQPR:$QdSrc, rGPR:$RdmSrc, long_shift:$imm),
|
|
|
|
NoItinerary, "vshlc", "", "$QdSrc, $RdmSrc, $imm",
|
|
|
|
vpred_n, "$RdmDest = $RdmSrc,$Qd = $QdSrc"> {
|
|
|
|
bits<5> imm;
|
|
|
|
bits<4> Qd;
|
|
|
|
bits<4> RdmDest;
|
2019-06-28 15:08:42 +08:00
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
let Inst{28} = 0b0;
|
|
|
|
let Inst{25-23} = 0b101;
|
|
|
|
let Inst{22} = Qd{3};
|
|
|
|
let Inst{21} = 0b1;
|
|
|
|
let Inst{20-16} = imm{4-0};
|
|
|
|
let Inst{15-13} = Qd{2-0};
|
|
|
|
let Inst{12-4} = 0b011111100;
|
|
|
|
let Inst{3-0} = RdmDest{3-0};
|
|
|
|
}
|
2019-06-28 15:08:42 +08:00
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
class MVE_shift_imm<dag oops, dag iops, string iname, string suffix,
|
|
|
|
string ops, vpred_ops vpred, string cstr,
|
|
|
|
list<dag> pattern=[]>
|
|
|
|
: MVE_p<oops, iops, NoItinerary, iname, suffix, ops, vpred, cstr, pattern> {
|
|
|
|
bits<4> Qd;
|
|
|
|
bits<4> Qm;
|
2019-06-28 15:08:42 +08:00
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
let Inst{22} = Qd{3};
|
|
|
|
let Inst{15-13} = Qd{2-0};
|
|
|
|
let Inst{5} = Qm{3};
|
|
|
|
let Inst{3-1} = Qm{2-0};
|
|
|
|
}
|
2019-06-28 15:08:42 +08:00
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
class MVE_VMOVL<string iname, string suffix, bits<2> sz, bit U,
|
|
|
|
list<dag> pattern=[]>
|
|
|
|
: MVE_shift_imm<(outs MQPR:$Qd), (ins MQPR:$Qm),
|
|
|
|
iname, suffix, "$Qd, $Qm", vpred_r, "",
|
|
|
|
pattern> {
|
|
|
|
let Inst{28} = U;
|
|
|
|
let Inst{25-23} = 0b101;
|
|
|
|
let Inst{21} = 0b1;
|
|
|
|
let Inst{20-19} = sz{1-0};
|
|
|
|
let Inst{18-16} = 0b000;
|
|
|
|
let Inst{11-6} = 0b111101;
|
|
|
|
let Inst{4} = 0b0;
|
|
|
|
let Inst{0} = 0b0;
|
|
|
|
}
|
|
|
|
|
|
|
|
multiclass MVE_VMOVL_shift_half<string iname, string suffix, bits<2> sz, bit U,
|
|
|
|
list<dag> pattern=[]> {
|
|
|
|
def bh : MVE_VMOVL<!strconcat(iname, "b"), suffix, sz, U, pattern> {
|
|
|
|
let Inst{12} = 0b0;
|
|
|
|
}
|
|
|
|
def th : MVE_VMOVL<!strconcat(iname, "t"), suffix, sz, U, pattern> {
|
|
|
|
let Inst{12} = 0b1;
|
|
|
|
}
|
2019-06-28 15:08:42 +08:00
|
|
|
}
|
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
defm MVE_VMOVLs8 : MVE_VMOVL_shift_half<"vmovl", "s8", 0b01, 0b0>;
|
|
|
|
defm MVE_VMOVLu8 : MVE_VMOVL_shift_half<"vmovl", "u8", 0b01, 0b1>;
|
|
|
|
defm MVE_VMOVLs16 : MVE_VMOVL_shift_half<"vmovl", "s16", 0b10, 0b0>;
|
|
|
|
defm MVE_VMOVLu16 : MVE_VMOVL_shift_half<"vmovl", "u16", 0b10, 0b1>;
|
2019-06-20 23:16:56 +08:00
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
let Predicates = [HasMVEInt] in {
|
|
|
|
def : Pat<(sext_inreg (v4i32 MQPR:$src), v4i16),
|
|
|
|
(MVE_VMOVLs16bh MQPR:$src)>;
|
|
|
|
def : Pat<(sext_inreg (v8i16 MQPR:$src), v8i8),
|
|
|
|
(MVE_VMOVLs8bh MQPR:$src)>;
|
|
|
|
def : Pat<(sext_inreg (v4i32 MQPR:$src), v4i8),
|
|
|
|
(MVE_VMOVLs16bh (MVE_VMOVLs8bh MQPR:$src))>;
|
2019-06-20 23:16:56 +08:00
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
// zext_inreg 16 -> 32
|
|
|
|
def : Pat<(and (v4i32 MQPR:$src), (v4i32 (ARMvmovImm (i32 0xCFF)))),
|
|
|
|
(MVE_VMOVLu16bh MQPR:$src)>;
|
|
|
|
// zext_inreg 8 -> 16
|
|
|
|
def : Pat<(and (v8i16 MQPR:$src), (v8i16 (ARMvmovImm (i32 0x8FF)))),
|
|
|
|
(MVE_VMOVLu8bh MQPR:$src)>;
|
2019-06-20 23:16:56 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
class MVE_VSHLL_imm<string iname, string suffix, bit U, bit th,
|
|
|
|
dag immops, list<dag> pattern=[]>
|
|
|
|
: MVE_shift_imm<(outs MQPR:$Qd), !con((ins MQPR:$Qm), immops),
|
|
|
|
iname, suffix, "$Qd, $Qm, $imm", vpred_r, "", pattern> {
|
|
|
|
let Inst{28} = U;
|
|
|
|
let Inst{25-23} = 0b101;
|
|
|
|
let Inst{21} = 0b1;
|
|
|
|
let Inst{12} = th;
|
|
|
|
let Inst{11-6} = 0b111101;
|
|
|
|
let Inst{4} = 0b0;
|
2019-06-20 23:16:56 +08:00
|
|
|
let Inst{0} = 0b0;
|
|
|
|
}
|
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
// The immediate VSHLL instructions accept shift counts from 1 up to
|
|
|
|
// the lane width (8 or 16), but the full-width shifts have an
|
|
|
|
// entirely separate encoding, given below with 'lw' in the name.
|
2019-06-20 23:16:56 +08:00
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
class MVE_VSHLL_imm8<string iname, string suffix,
|
|
|
|
bit U, bit th, list<dag> pattern=[]>
|
|
|
|
: MVE_VSHLL_imm<iname, suffix, U, th, (ins mve_shift_imm1_7:$imm), pattern> {
|
|
|
|
bits<3> imm;
|
|
|
|
let Inst{20-19} = 0b01;
|
|
|
|
let Inst{18-16} = imm;
|
2019-06-28 19:44:03 +08:00
|
|
|
}
|
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
class MVE_VSHLL_imm16<string iname, string suffix,
|
|
|
|
bit U, bit th, list<dag> pattern=[]>
|
|
|
|
: MVE_VSHLL_imm<iname, suffix, U, th, (ins mve_shift_imm1_15:$imm), pattern> {
|
|
|
|
bits<4> imm;
|
|
|
|
let Inst{20} = 0b1;
|
|
|
|
let Inst{19-16} = imm;
|
|
|
|
}
|
2019-06-20 23:16:56 +08:00
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
def MVE_VSHLL_imms8bh : MVE_VSHLL_imm8 <"vshllb", "s8", 0b0, 0b0>;
|
|
|
|
def MVE_VSHLL_imms8th : MVE_VSHLL_imm8 <"vshllt", "s8", 0b0, 0b1>;
|
|
|
|
def MVE_VSHLL_immu8bh : MVE_VSHLL_imm8 <"vshllb", "u8", 0b1, 0b0>;
|
|
|
|
def MVE_VSHLL_immu8th : MVE_VSHLL_imm8 <"vshllt", "u8", 0b1, 0b1>;
|
|
|
|
def MVE_VSHLL_imms16bh : MVE_VSHLL_imm16<"vshllb", "s16", 0b0, 0b0>;
|
|
|
|
def MVE_VSHLL_imms16th : MVE_VSHLL_imm16<"vshllt", "s16", 0b0, 0b1>;
|
|
|
|
def MVE_VSHLL_immu16bh : MVE_VSHLL_imm16<"vshllb", "u16", 0b1, 0b0>;
|
|
|
|
def MVE_VSHLL_immu16th : MVE_VSHLL_imm16<"vshllt", "u16", 0b1, 0b1>;
|
|
|
|
|
|
|
|
class MVE_VSHLL_by_lane_width<string iname, string suffix, bits<2> size,
|
|
|
|
bit U, string ops, list<dag> pattern=[]>
|
|
|
|
: MVE_shift_imm<(outs MQPR:$Qd), (ins MQPR:$Qm),
|
|
|
|
iname, suffix, ops, vpred_r, "", pattern> {
|
|
|
|
let Inst{28} = U;
|
|
|
|
let Inst{25-23} = 0b100;
|
|
|
|
let Inst{21-20} = 0b11;
|
|
|
|
let Inst{19-18} = size{1-0};
|
|
|
|
let Inst{17-16} = 0b01;
|
|
|
|
let Inst{11-6} = 0b111000;
|
2019-06-20 23:16:56 +08:00
|
|
|
let Inst{4} = 0b0;
|
2019-07-15 19:22:05 +08:00
|
|
|
let Inst{0} = 0b1;
|
2019-06-20 23:16:56 +08:00
|
|
|
}
|
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
multiclass MVE_VSHLL_lw<string iname, string suffix, bits<2> sz, bit U,
|
|
|
|
string ops, list<dag> pattern=[]> {
|
|
|
|
def bh : MVE_VSHLL_by_lane_width<iname#"b", suffix, sz, U, ops, pattern> {
|
|
|
|
let Inst{12} = 0b0;
|
|
|
|
}
|
|
|
|
def th : MVE_VSHLL_by_lane_width<iname#"t", suffix, sz, U, ops, pattern> {
|
|
|
|
let Inst{12} = 0b1;
|
|
|
|
}
|
|
|
|
}
|
2019-06-20 23:16:56 +08:00
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
defm MVE_VSHLL_lws8 : MVE_VSHLL_lw<"vshll", "s8", 0b00, 0b0, "$Qd, $Qm, #8">;
|
|
|
|
defm MVE_VSHLL_lws16 : MVE_VSHLL_lw<"vshll", "s16", 0b01, 0b0, "$Qd, $Qm, #16">;
|
|
|
|
defm MVE_VSHLL_lwu8 : MVE_VSHLL_lw<"vshll", "u8", 0b00, 0b1, "$Qd, $Qm, #8">;
|
|
|
|
defm MVE_VSHLL_lwu16 : MVE_VSHLL_lw<"vshll", "u16", 0b01, 0b1, "$Qd, $Qm, #16">;
|
2019-06-20 23:16:56 +08:00
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
class MVE_VxSHRN<string iname, string suffix, bit bit_12, bit bit_28,
|
|
|
|
dag immops, list<dag> pattern=[]>
|
|
|
|
: MVE_shift_imm<(outs MQPR:$Qd), !con((ins MQPR:$QdSrc, MQPR:$Qm), immops),
|
|
|
|
iname, suffix, "$Qd, $Qm, $imm", vpred_n, "$Qd = $QdSrc",
|
|
|
|
pattern> {
|
|
|
|
bits<5> imm;
|
2019-06-20 23:16:56 +08:00
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
let Inst{28} = bit_28;
|
|
|
|
let Inst{25-23} = 0b101;
|
|
|
|
let Inst{21} = 0b0;
|
|
|
|
let Inst{20-16} = imm{4-0};
|
|
|
|
let Inst{12} = bit_12;
|
|
|
|
let Inst{11-6} = 0b111111;
|
2019-06-20 23:16:56 +08:00
|
|
|
let Inst{4} = 0b0;
|
2019-07-15 19:22:05 +08:00
|
|
|
let Inst{0} = 0b1;
|
2019-06-20 23:16:56 +08:00
|
|
|
}
|
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
def MVE_VRSHRNi16bh : MVE_VxSHRN<
|
|
|
|
"vrshrnb", "i16", 0b0, 0b1, (ins shr_imm8:$imm)> {
|
|
|
|
let Inst{20-19} = 0b01;
|
|
|
|
}
|
|
|
|
def MVE_VRSHRNi16th : MVE_VxSHRN<
|
|
|
|
"vrshrnt", "i16", 0b1, 0b1,(ins shr_imm8:$imm)> {
|
|
|
|
let Inst{20-19} = 0b01;
|
|
|
|
}
|
|
|
|
def MVE_VRSHRNi32bh : MVE_VxSHRN<
|
|
|
|
"vrshrnb", "i32", 0b0, 0b1, (ins shr_imm16:$imm)> {
|
|
|
|
let Inst{20} = 0b1;
|
|
|
|
}
|
|
|
|
def MVE_VRSHRNi32th : MVE_VxSHRN<
|
|
|
|
"vrshrnt", "i32", 0b1, 0b1, (ins shr_imm16:$imm)> {
|
|
|
|
let Inst{20} = 0b1;
|
|
|
|
}
|
2019-06-20 23:16:56 +08:00
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
def MVE_VSHRNi16bh : MVE_VxSHRN<
|
|
|
|
"vshrnb", "i16", 0b0, 0b0, (ins shr_imm8:$imm)> {
|
|
|
|
let Inst{20-19} = 0b01;
|
|
|
|
}
|
|
|
|
def MVE_VSHRNi16th : MVE_VxSHRN<
|
|
|
|
"vshrnt", "i16", 0b1, 0b0, (ins shr_imm8:$imm)> {
|
|
|
|
let Inst{20-19} = 0b01;
|
|
|
|
}
|
|
|
|
def MVE_VSHRNi32bh : MVE_VxSHRN<
|
|
|
|
"vshrnb", "i32", 0b0, 0b0, (ins shr_imm16:$imm)> {
|
|
|
|
let Inst{20} = 0b1;
|
|
|
|
}
|
|
|
|
def MVE_VSHRNi32th : MVE_VxSHRN<
|
|
|
|
"vshrnt", "i32", 0b1, 0b0, (ins shr_imm16:$imm)> {
|
|
|
|
let Inst{20} = 0b1;
|
|
|
|
}
|
2019-06-20 23:16:56 +08:00
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
class MVE_VxQRSHRUN<string iname, string suffix, bit bit_28, bit bit_12, dag immops,
|
|
|
|
list<dag> pattern=[]>
|
|
|
|
: MVE_shift_imm<(outs MQPR:$Qd), !con((ins MQPR:$QdSrc, MQPR:$Qm), immops),
|
|
|
|
iname, suffix, "$Qd, $Qm, $imm", vpred_n, "$Qd = $QdSrc",
|
|
|
|
pattern> {
|
|
|
|
bits<5> imm;
|
|
|
|
|
|
|
|
let Inst{28} = bit_28;
|
|
|
|
let Inst{25-23} = 0b101;
|
|
|
|
let Inst{21} = 0b0;
|
|
|
|
let Inst{20-16} = imm{4-0};
|
|
|
|
let Inst{12} = bit_12;
|
|
|
|
let Inst{11-6} = 0b111111;
|
|
|
|
let Inst{4} = 0b0;
|
|
|
|
let Inst{0} = 0b0;
|
2019-06-28 15:21:11 +08:00
|
|
|
}
|
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
def MVE_VQRSHRUNs16bh : MVE_VxQRSHRUN<
|
|
|
|
"vqrshrunb", "s16", 0b1, 0b0, (ins shr_imm8:$imm)> {
|
|
|
|
let Inst{20-19} = 0b01;
|
|
|
|
}
|
|
|
|
def MVE_VQRSHRUNs16th : MVE_VxQRSHRUN<
|
|
|
|
"vqrshrunt", "s16", 0b1, 0b1, (ins shr_imm8:$imm)> {
|
|
|
|
let Inst{20-19} = 0b01;
|
|
|
|
}
|
|
|
|
def MVE_VQRSHRUNs32bh : MVE_VxQRSHRUN<
|
|
|
|
"vqrshrunb", "s32", 0b1, 0b0, (ins shr_imm16:$imm)> {
|
|
|
|
let Inst{20} = 0b1;
|
|
|
|
}
|
|
|
|
def MVE_VQRSHRUNs32th : MVE_VxQRSHRUN<
|
|
|
|
"vqrshrunt", "s32", 0b1, 0b1, (ins shr_imm16:$imm)> {
|
|
|
|
let Inst{20} = 0b1;
|
|
|
|
}
|
2019-06-20 23:16:56 +08:00
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
def MVE_VQSHRUNs16bh : MVE_VxQRSHRUN<
|
|
|
|
"vqshrunb", "s16", 0b0, 0b0, (ins shr_imm8:$imm)> {
|
|
|
|
let Inst{20-19} = 0b01;
|
2019-06-28 15:21:11 +08:00
|
|
|
}
|
2019-07-15 19:22:05 +08:00
|
|
|
def MVE_VQSHRUNs16th : MVE_VxQRSHRUN<
|
|
|
|
"vqshrunt", "s16", 0b0, 0b1, (ins shr_imm8:$imm)> {
|
|
|
|
let Inst{20-19} = 0b01;
|
|
|
|
}
|
|
|
|
def MVE_VQSHRUNs32bh : MVE_VxQRSHRUN<
|
|
|
|
"vqshrunb", "s32", 0b0, 0b0, (ins shr_imm16:$imm)> {
|
|
|
|
let Inst{20} = 0b1;
|
|
|
|
}
|
|
|
|
def MVE_VQSHRUNs32th : MVE_VxQRSHRUN<
|
|
|
|
"vqshrunt", "s32", 0b0, 0b1, (ins shr_imm16:$imm)> {
|
|
|
|
let Inst{20} = 0b1;
|
|
|
|
}
|
|
|
|
|
|
|
|
class MVE_VxQRSHRN<string iname, string suffix, bit bit_0, bit bit_12,
|
|
|
|
dag immops, list<dag> pattern=[]>
|
|
|
|
: MVE_shift_imm<(outs MQPR:$Qd), !con((ins MQPR:$QdSrc, MQPR:$Qm), immops),
|
|
|
|
iname, suffix, "$Qd, $Qm, $imm", vpred_n, "$Qd = $QdSrc",
|
|
|
|
pattern> {
|
|
|
|
bits<5> imm;
|
2019-06-28 15:21:11 +08:00
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
let Inst{25-23} = 0b101;
|
|
|
|
let Inst{21} = 0b0;
|
|
|
|
let Inst{20-16} = imm{4-0};
|
|
|
|
let Inst{12} = bit_12;
|
|
|
|
let Inst{11-6} = 0b111101;
|
|
|
|
let Inst{4} = 0b0;
|
|
|
|
let Inst{0} = bit_0;
|
|
|
|
}
|
2019-06-20 23:16:56 +08:00
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
multiclass MVE_VxQRSHRN_types<string iname, bit bit_0, bit bit_12> {
|
|
|
|
def s16 : MVE_VxQRSHRN<iname, "s16", bit_0, bit_12, (ins shr_imm8:$imm)> {
|
|
|
|
let Inst{28} = 0b0;
|
|
|
|
let Inst{20-19} = 0b01;
|
|
|
|
}
|
|
|
|
def u16 : MVE_VxQRSHRN<iname, "u16", bit_0, bit_12, (ins shr_imm8:$imm)> {
|
|
|
|
let Inst{28} = 0b1;
|
|
|
|
let Inst{20-19} = 0b01;
|
|
|
|
}
|
|
|
|
def s32 : MVE_VxQRSHRN<iname, "s32", bit_0, bit_12, (ins shr_imm16:$imm)> {
|
|
|
|
let Inst{28} = 0b0;
|
|
|
|
let Inst{20} = 0b1;
|
|
|
|
}
|
|
|
|
def u32 : MVE_VxQRSHRN<iname, "u32", bit_0, bit_12, (ins shr_imm16:$imm)> {
|
|
|
|
let Inst{28} = 0b1;
|
|
|
|
let Inst{20} = 0b1;
|
|
|
|
}
|
2019-06-20 23:16:56 +08:00
|
|
|
}
|
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
defm MVE_VQRSHRNbh : MVE_VxQRSHRN_types<"vqrshrnb", 0b1, 0b0>;
|
|
|
|
defm MVE_VQRSHRNth : MVE_VxQRSHRN_types<"vqrshrnt", 0b1, 0b1>;
|
|
|
|
defm MVE_VQSHRNbh : MVE_VxQRSHRN_types<"vqshrnb", 0b0, 0b0>;
|
|
|
|
defm MVE_VQSHRNth : MVE_VxQRSHRN_types<"vqshrnt", 0b0, 0b1>;
|
2019-06-20 23:16:56 +08:00
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
// end of mve_imm_shift instructions
|
2019-06-20 23:16:56 +08:00
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
// start of mve_shift instructions
|
2019-06-20 23:16:56 +08:00
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
class MVE_shift_by_vec<string iname, string suffix, bit U,
|
|
|
|
bits<2> size, bit bit_4, bit bit_8>
|
|
|
|
: MVE_p<(outs MQPR:$Qd), (ins MQPR:$Qm, MQPR:$Qn), NoItinerary,
|
|
|
|
iname, suffix, "$Qd, $Qm, $Qn", vpred_r, "", []> {
|
|
|
|
// Shift instructions which take a vector of shift counts
|
|
|
|
bits<4> Qd;
|
|
|
|
bits<4> Qm;
|
|
|
|
bits<4> Qn;
|
2019-06-20 23:16:56 +08:00
|
|
|
|
|
|
|
let Inst{28} = U;
|
2019-07-15 19:22:05 +08:00
|
|
|
let Inst{25-24} = 0b11;
|
|
|
|
let Inst{23} = 0b0;
|
|
|
|
let Inst{22} = Qd{3};
|
|
|
|
let Inst{21-20} = size;
|
|
|
|
let Inst{19-17} = Qn{2-0};
|
2019-06-20 23:16:56 +08:00
|
|
|
let Inst{16} = 0b0;
|
2019-07-15 19:22:05 +08:00
|
|
|
let Inst{15-13} = Qd{2-0};
|
|
|
|
let Inst{12-9} = 0b0010;
|
|
|
|
let Inst{8} = bit_8;
|
|
|
|
let Inst{7} = Qn{3};
|
|
|
|
let Inst{6} = 0b1;
|
|
|
|
let Inst{5} = Qm{3};
|
|
|
|
let Inst{4} = bit_4;
|
|
|
|
let Inst{3-1} = Qm{2-0};
|
2019-06-20 23:16:56 +08:00
|
|
|
let Inst{0} = 0b0;
|
2019-10-15 21:12:51 +08:00
|
|
|
let validForTailPredication = 1;
|
2019-06-20 23:16:56 +08:00
|
|
|
}
|
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
multiclass mve_shift_by_vec_multi<string iname, bit bit_4, bit bit_8> {
|
|
|
|
def s8 : MVE_shift_by_vec<iname, "s8", 0b0, 0b00, bit_4, bit_8>;
|
|
|
|
def s16 : MVE_shift_by_vec<iname, "s16", 0b0, 0b01, bit_4, bit_8>;
|
|
|
|
def s32 : MVE_shift_by_vec<iname, "s32", 0b0, 0b10, bit_4, bit_8>;
|
|
|
|
def u8 : MVE_shift_by_vec<iname, "u8", 0b1, 0b00, bit_4, bit_8>;
|
|
|
|
def u16 : MVE_shift_by_vec<iname, "u16", 0b1, 0b01, bit_4, bit_8>;
|
|
|
|
def u32 : MVE_shift_by_vec<iname, "u32", 0b1, 0b10, bit_4, bit_8>;
|
2019-06-20 23:16:56 +08:00
|
|
|
}
|
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
defm MVE_VSHL_by_vec : mve_shift_by_vec_multi<"vshl", 0b0, 0b0>;
|
|
|
|
defm MVE_VQSHL_by_vec : mve_shift_by_vec_multi<"vqshl", 0b1, 0b0>;
|
|
|
|
defm MVE_VQRSHL_by_vec : mve_shift_by_vec_multi<"vqrshl", 0b1, 0b1>;
|
|
|
|
defm MVE_VRSHL_by_vec : mve_shift_by_vec_multi<"vrshl", 0b0, 0b1>;
|
2019-06-20 23:16:56 +08:00
|
|
|
|
2019-07-15 19:35:39 +08:00
|
|
|
let Predicates = [HasMVEInt] in {
|
|
|
|
def : Pat<(v4i32 (ARMvshlu (v4i32 MQPR:$Qm), (v4i32 MQPR:$Qn))),
|
|
|
|
(v4i32 (MVE_VSHL_by_vecu32 (v4i32 MQPR:$Qm), (v4i32 MQPR:$Qn)))>;
|
|
|
|
def : Pat<(v8i16 (ARMvshlu (v8i16 MQPR:$Qm), (v8i16 MQPR:$Qn))),
|
|
|
|
(v8i16 (MVE_VSHL_by_vecu16 (v8i16 MQPR:$Qm), (v8i16 MQPR:$Qn)))>;
|
|
|
|
def : Pat<(v16i8 (ARMvshlu (v16i8 MQPR:$Qm), (v16i8 MQPR:$Qn))),
|
|
|
|
(v16i8 (MVE_VSHL_by_vecu8 (v16i8 MQPR:$Qm), (v16i8 MQPR:$Qn)))>;
|
|
|
|
|
|
|
|
def : Pat<(v4i32 (ARMvshls (v4i32 MQPR:$Qm), (v4i32 MQPR:$Qn))),
|
|
|
|
(v4i32 (MVE_VSHL_by_vecs32 (v4i32 MQPR:$Qm), (v4i32 MQPR:$Qn)))>;
|
|
|
|
def : Pat<(v8i16 (ARMvshls (v8i16 MQPR:$Qm), (v8i16 MQPR:$Qn))),
|
|
|
|
(v8i16 (MVE_VSHL_by_vecs16 (v8i16 MQPR:$Qm), (v8i16 MQPR:$Qn)))>;
|
|
|
|
def : Pat<(v16i8 (ARMvshls (v16i8 MQPR:$Qm), (v16i8 MQPR:$Qn))),
|
|
|
|
(v16i8 (MVE_VSHL_by_vecs8 (v16i8 MQPR:$Qm), (v16i8 MQPR:$Qn)))>;
|
|
|
|
}
|
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
class MVE_shift_with_imm<string iname, string suffix, dag oops, dag iops,
|
|
|
|
string ops, vpred_ops vpred, string cstr,
|
|
|
|
list<dag> pattern=[]>
|
|
|
|
: MVE_p<oops, iops, NoItinerary, iname, suffix, ops, vpred, cstr, pattern> {
|
|
|
|
bits<4> Qd;
|
|
|
|
bits<4> Qm;
|
2019-06-20 23:16:56 +08:00
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
let Inst{23} = 0b1;
|
|
|
|
let Inst{22} = Qd{3};
|
|
|
|
let Inst{15-13} = Qd{2-0};
|
|
|
|
let Inst{12-11} = 0b00;
|
|
|
|
let Inst{7-6} = 0b01;
|
|
|
|
let Inst{5} = Qm{3};
|
|
|
|
let Inst{4} = 0b1;
|
|
|
|
let Inst{3-1} = Qm{2-0};
|
2019-06-20 23:16:56 +08:00
|
|
|
let Inst{0} = 0b0;
|
2019-10-15 21:12:51 +08:00
|
|
|
let validForTailPredication = 1;
|
2019-06-20 23:16:56 +08:00
|
|
|
}
|
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
class MVE_VSxI_imm<string iname, string suffix, bit bit_8, dag imm>
|
|
|
|
: MVE_shift_with_imm<iname, suffix, (outs MQPR:$Qd),
|
|
|
|
!con((ins MQPR:$Qd_src, MQPR:$Qm), imm),
|
|
|
|
"$Qd, $Qm, $imm", vpred_n, "$Qd = $Qd_src"> {
|
|
|
|
bits<6> imm;
|
|
|
|
let Inst{28} = 0b1;
|
|
|
|
let Inst{25-24} = 0b11;
|
|
|
|
let Inst{21-16} = imm;
|
|
|
|
let Inst{10-9} = 0b10;
|
|
|
|
let Inst{8} = bit_8;
|
2019-10-15 21:12:51 +08:00
|
|
|
let validForTailPredication = 1;
|
2019-07-15 19:22:05 +08:00
|
|
|
}
|
2019-06-20 23:16:56 +08:00
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
def MVE_VSRIimm8 : MVE_VSxI_imm<"vsri", "8", 0b0, (ins shr_imm8:$imm)> {
|
|
|
|
let Inst{21-19} = 0b001;
|
2019-06-20 23:16:56 +08:00
|
|
|
}
|
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
def MVE_VSRIimm16 : MVE_VSxI_imm<"vsri", "16", 0b0, (ins shr_imm16:$imm)> {
|
|
|
|
let Inst{21-20} = 0b01;
|
|
|
|
}
|
2019-06-20 23:16:56 +08:00
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
def MVE_VSRIimm32 : MVE_VSxI_imm<"vsri", "32", 0b0, (ins shr_imm32:$imm)> {
|
|
|
|
let Inst{21} = 0b1;
|
|
|
|
}
|
2019-06-28 15:08:42 +08:00
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
def MVE_VSLIimm8 : MVE_VSxI_imm<"vsli", "8", 0b1, (ins imm0_7:$imm)> {
|
|
|
|
let Inst{21-19} = 0b001;
|
|
|
|
}
|
2019-06-28 15:08:42 +08:00
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
def MVE_VSLIimm16 : MVE_VSxI_imm<"vsli", "16", 0b1, (ins imm0_15:$imm)> {
|
|
|
|
let Inst{21-20} = 0b01;
|
|
|
|
}
|
2019-06-28 15:08:42 +08:00
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
def MVE_VSLIimm32 : MVE_VSxI_imm<"vsli", "32", 0b1,(ins imm0_31:$imm)> {
|
|
|
|
let Inst{21} = 0b1;
|
2019-06-28 15:08:42 +08:00
|
|
|
}
|
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
class MVE_VQSHL_imm<string suffix, dag imm>
|
|
|
|
: MVE_shift_with_imm<"vqshl", suffix, (outs MQPR:$Qd),
|
|
|
|
!con((ins MQPR:$Qm), imm), "$Qd, $Qm, $imm",
|
|
|
|
vpred_r, ""> {
|
|
|
|
bits<6> imm;
|
2019-06-28 15:08:42 +08:00
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
let Inst{25-24} = 0b11;
|
|
|
|
let Inst{21-16} = imm;
|
|
|
|
let Inst{10-8} = 0b111;
|
|
|
|
}
|
2019-06-20 23:16:56 +08:00
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
def MVE_VSLIimms8 : MVE_VQSHL_imm<"s8", (ins imm0_7:$imm)> {
|
|
|
|
let Inst{28} = 0b0;
|
|
|
|
let Inst{21-19} = 0b001;
|
2019-06-20 23:16:56 +08:00
|
|
|
}
|
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
def MVE_VSLIimmu8 : MVE_VQSHL_imm<"u8", (ins imm0_7:$imm)> {
|
|
|
|
let Inst{28} = 0b1;
|
|
|
|
let Inst{21-19} = 0b001;
|
|
|
|
}
|
|
|
|
|
|
|
|
def MVE_VSLIimms16 : MVE_VQSHL_imm<"s16", (ins imm0_15:$imm)> {
|
|
|
|
let Inst{28} = 0b0;
|
|
|
|
let Inst{21-20} = 0b01;
|
|
|
|
}
|
2019-06-20 23:16:56 +08:00
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
def MVE_VSLIimmu16 : MVE_VQSHL_imm<"u16", (ins imm0_15:$imm)> {
|
2019-06-20 23:16:56 +08:00
|
|
|
let Inst{28} = 0b1;
|
2019-07-15 19:22:05 +08:00
|
|
|
let Inst{21-20} = 0b01;
|
2019-06-20 23:16:56 +08:00
|
|
|
}
|
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
def MVE_VSLIimms32 : MVE_VQSHL_imm<"s32", (ins imm0_31:$imm)> {
|
|
|
|
let Inst{28} = 0b0;
|
|
|
|
let Inst{21} = 0b1;
|
|
|
|
}
|
2019-06-20 23:16:56 +08:00
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
def MVE_VSLIimmu32 : MVE_VQSHL_imm<"u32", (ins imm0_31:$imm)> {
|
|
|
|
let Inst{28} = 0b1;
|
|
|
|
let Inst{21} = 0b1;
|
|
|
|
}
|
2019-06-20 23:16:56 +08:00
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
class MVE_VQSHLU_imm<string suffix, dag imm>
|
|
|
|
: MVE_shift_with_imm<"vqshlu", suffix, (outs MQPR:$Qd),
|
|
|
|
!con((ins MQPR:$Qm), imm), "$Qd, $Qm, $imm",
|
|
|
|
vpred_r, ""> {
|
|
|
|
bits<6> imm;
|
2019-06-20 23:16:56 +08:00
|
|
|
|
|
|
|
let Inst{28} = 0b1;
|
2019-07-15 19:22:05 +08:00
|
|
|
let Inst{25-24} = 0b11;
|
|
|
|
let Inst{21-16} = imm;
|
|
|
|
let Inst{10-8} = 0b110;
|
2019-06-20 23:16:56 +08:00
|
|
|
}
|
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
def MVE_VQSHLU_imms8 : MVE_VQSHLU_imm<"s8", (ins imm0_7:$imm)> {
|
|
|
|
let Inst{21-19} = 0b001;
|
|
|
|
}
|
2019-06-20 23:16:56 +08:00
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
def MVE_VQSHLU_imms16 : MVE_VQSHLU_imm<"s16", (ins imm0_15:$imm)> {
|
|
|
|
let Inst{21-20} = 0b01;
|
2019-07-13 22:58:32 +08:00
|
|
|
}
|
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
def MVE_VQSHLU_imms32 : MVE_VQSHLU_imm<"s32", (ins imm0_31:$imm)> {
|
|
|
|
let Inst{21} = 0b1;
|
|
|
|
}
|
2019-06-20 23:16:56 +08:00
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
class MVE_VRSHR_imm<string suffix, dag imm>
|
|
|
|
: MVE_shift_with_imm<"vrshr", suffix, (outs MQPR:$Qd),
|
|
|
|
!con((ins MQPR:$Qm), imm), "$Qd, $Qm, $imm",
|
|
|
|
vpred_r, ""> {
|
|
|
|
bits<6> imm;
|
|
|
|
|
|
|
|
let Inst{25-24} = 0b11;
|
|
|
|
let Inst{21-16} = imm;
|
|
|
|
let Inst{10-8} = 0b010;
|
2019-07-13 23:26:51 +08:00
|
|
|
}
|
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
def MVE_VRSHR_imms8 : MVE_VRSHR_imm<"s8", (ins shr_imm8:$imm)> {
|
|
|
|
let Inst{28} = 0b0;
|
|
|
|
let Inst{21-19} = 0b001;
|
|
|
|
}
|
2019-06-20 23:16:56 +08:00
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
def MVE_VRSHR_immu8 : MVE_VRSHR_imm<"u8", (ins shr_imm8:$imm)> {
|
2019-06-20 23:16:56 +08:00
|
|
|
let Inst{28} = 0b1;
|
2019-07-15 19:22:05 +08:00
|
|
|
let Inst{21-19} = 0b001;
|
2019-06-20 23:16:56 +08:00
|
|
|
}
|
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
def MVE_VRSHR_imms16 : MVE_VRSHR_imm<"s16", (ins shr_imm16:$imm)> {
|
|
|
|
let Inst{28} = 0b0;
|
|
|
|
let Inst{21-20} = 0b01;
|
|
|
|
}
|
2019-06-20 23:16:56 +08:00
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
def MVE_VRSHR_immu16 : MVE_VRSHR_imm<"u16", (ins shr_imm16:$imm)> {
|
|
|
|
let Inst{28} = 0b1;
|
|
|
|
let Inst{21-20} = 0b01;
|
|
|
|
}
|
2019-06-20 23:16:56 +08:00
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
def MVE_VRSHR_imms32 : MVE_VRSHR_imm<"s32", (ins shr_imm32:$imm)> {
|
|
|
|
let Inst{28} = 0b0;
|
|
|
|
let Inst{21} = 0b1;
|
|
|
|
}
|
2019-06-20 23:16:56 +08:00
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
def MVE_VRSHR_immu32 : MVE_VRSHR_imm<"u32", (ins shr_imm32:$imm)> {
|
|
|
|
let Inst{28} = 0b1;
|
|
|
|
let Inst{21} = 0b1;
|
2019-06-20 23:16:56 +08:00
|
|
|
}
|
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
class MVE_VSHR_imm<string suffix, dag imm>
|
|
|
|
: MVE_shift_with_imm<"vshr", suffix, (outs MQPR:$Qd),
|
|
|
|
!con((ins MQPR:$Qm), imm), "$Qd, $Qm, $imm",
|
|
|
|
vpred_r, ""> {
|
|
|
|
bits<6> imm;
|
|
|
|
|
|
|
|
let Inst{25-24} = 0b11;
|
|
|
|
let Inst{21-16} = imm;
|
|
|
|
let Inst{10-8} = 0b000;
|
2019-06-20 23:16:56 +08:00
|
|
|
}
|
2019-07-15 19:22:05 +08:00
|
|
|
|
|
|
|
def MVE_VSHR_imms8 : MVE_VSHR_imm<"s8", (ins shr_imm8:$imm)> {
|
|
|
|
let Inst{28} = 0b0;
|
|
|
|
let Inst{21-19} = 0b001;
|
2019-06-20 23:16:56 +08:00
|
|
|
}
|
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
def MVE_VSHR_immu8 : MVE_VSHR_imm<"u8", (ins shr_imm8:$imm)> {
|
|
|
|
let Inst{28} = 0b1;
|
|
|
|
let Inst{21-19} = 0b001;
|
2019-06-20 23:16:56 +08:00
|
|
|
}
|
2019-07-15 19:22:05 +08:00
|
|
|
|
|
|
|
def MVE_VSHR_imms16 : MVE_VSHR_imm<"s16", (ins shr_imm16:$imm)> {
|
|
|
|
let Inst{28} = 0b0;
|
|
|
|
let Inst{21-20} = 0b01;
|
2019-06-20 23:16:56 +08:00
|
|
|
}
|
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
def MVE_VSHR_immu16 : MVE_VSHR_imm<"u16", (ins shr_imm16:$imm)> {
|
|
|
|
let Inst{28} = 0b1;
|
|
|
|
let Inst{21-20} = 0b01;
|
|
|
|
}
|
2019-07-05 18:02:43 +08:00
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
def MVE_VSHR_imms32 : MVE_VSHR_imm<"s32", (ins shr_imm32:$imm)> {
|
|
|
|
let Inst{28} = 0b0;
|
|
|
|
let Inst{21} = 0b1;
|
|
|
|
}
|
2019-07-05 18:02:43 +08:00
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
def MVE_VSHR_immu32 : MVE_VSHR_imm<"u32", (ins shr_imm32:$imm)> {
|
|
|
|
let Inst{28} = 0b1;
|
|
|
|
let Inst{21} = 0b1;
|
2019-07-05 18:02:43 +08:00
|
|
|
}
|
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
class MVE_VSHL_imm<string suffix, dag imm>
|
|
|
|
: MVE_shift_with_imm<"vshl", suffix, (outs MQPR:$Qd),
|
|
|
|
!con((ins MQPR:$Qm), imm), "$Qd, $Qm, $imm",
|
|
|
|
vpred_r, ""> {
|
|
|
|
bits<6> imm;
|
2019-06-20 23:16:56 +08:00
|
|
|
|
|
|
|
let Inst{28} = 0b0;
|
2019-07-15 19:22:05 +08:00
|
|
|
let Inst{25-24} = 0b11;
|
|
|
|
let Inst{21-16} = imm;
|
|
|
|
let Inst{10-8} = 0b101;
|
2019-06-20 23:16:56 +08:00
|
|
|
}
|
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
def MVE_VSHL_immi8 : MVE_VSHL_imm<"i8", (ins imm0_7:$imm)> {
|
|
|
|
let Inst{21-19} = 0b001;
|
|
|
|
}
|
2019-06-20 23:16:56 +08:00
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
def MVE_VSHL_immi16 : MVE_VSHL_imm<"i16", (ins imm0_15:$imm)> {
|
|
|
|
let Inst{21-20} = 0b01;
|
|
|
|
}
|
2019-06-20 23:16:56 +08:00
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
def MVE_VSHL_immi32 : MVE_VSHL_imm<"i32", (ins imm0_31:$imm)> {
|
|
|
|
let Inst{21} = 0b1;
|
|
|
|
}
|
|
|
|
|
2019-07-15 19:35:39 +08:00
|
|
|
let Predicates = [HasMVEInt] in {
|
|
|
|
def : Pat<(v4i32 (ARMvshlImm (v4i32 MQPR:$src), imm0_31:$imm)),
|
|
|
|
(v4i32 (MVE_VSHL_immi32 (v4i32 MQPR:$src), imm0_31:$imm))>;
|
|
|
|
def : Pat<(v8i16 (ARMvshlImm (v8i16 MQPR:$src), imm0_15:$imm)),
|
|
|
|
(v8i16 (MVE_VSHL_immi16 (v8i16 MQPR:$src), imm0_15:$imm))>;
|
|
|
|
def : Pat<(v16i8 (ARMvshlImm (v16i8 MQPR:$src), imm0_7:$imm)),
|
|
|
|
(v16i8 (MVE_VSHL_immi8 (v16i8 MQPR:$src), imm0_7:$imm))>;
|
|
|
|
|
|
|
|
def : Pat<(v4i32 (ARMvshruImm (v4i32 MQPR:$src), imm0_31:$imm)),
|
|
|
|
(v4i32 (MVE_VSHR_immu32 (v4i32 MQPR:$src), imm0_31:$imm))>;
|
|
|
|
def : Pat<(v8i16 (ARMvshruImm (v8i16 MQPR:$src), imm0_15:$imm)),
|
|
|
|
(v8i16 (MVE_VSHR_immu16 (v8i16 MQPR:$src), imm0_15:$imm))>;
|
|
|
|
def : Pat<(v16i8 (ARMvshruImm (v16i8 MQPR:$src), imm0_7:$imm)),
|
|
|
|
(v16i8 (MVE_VSHR_immu8 (v16i8 MQPR:$src), imm0_7:$imm))>;
|
|
|
|
|
|
|
|
def : Pat<(v4i32 (ARMvshrsImm (v4i32 MQPR:$src), imm0_31:$imm)),
|
|
|
|
(v4i32 (MVE_VSHR_imms32 (v4i32 MQPR:$src), imm0_31:$imm))>;
|
|
|
|
def : Pat<(v8i16 (ARMvshrsImm (v8i16 MQPR:$src), imm0_15:$imm)),
|
|
|
|
(v8i16 (MVE_VSHR_imms16 (v8i16 MQPR:$src), imm0_15:$imm))>;
|
|
|
|
def : Pat<(v16i8 (ARMvshrsImm (v16i8 MQPR:$src), imm0_7:$imm)),
|
|
|
|
(v16i8 (MVE_VSHR_imms8 (v16i8 MQPR:$src), imm0_7:$imm))>;
|
|
|
|
}
|
|
|
|
|
2019-07-15 19:22:05 +08:00
|
|
|
// end of mve_shift instructions
|
2019-06-20 23:16:56 +08:00
|
|
|
|
[ARM] Add a batch of MVE floating-point instructions.
Summary:
This includes floating-point basic arithmetic (add/sub/multiply),
complex add/multiply, unary negation and absolute value, rounding to
integer value, and conversion to/from integer formats.
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62675
llvm-svn: 364013
2019-06-21 17:35:07 +08:00
|
|
|
// start of MVE Floating Point instructions
|
|
|
|
|
|
|
|
class MVE_float<string iname, string suffix, dag oops, dag iops, string ops,
|
|
|
|
vpred_ops vpred, string cstr, list<dag> pattern=[]>
|
|
|
|
: MVE_f<oops, iops, NoItinerary, iname, suffix, ops, vpred, cstr, pattern> {
|
|
|
|
bits<4> Qm;
|
|
|
|
|
|
|
|
let Inst{12} = 0b0;
|
|
|
|
let Inst{6} = 0b1;
|
|
|
|
let Inst{5} = Qm{3};
|
|
|
|
let Inst{3-1} = Qm{2-0};
|
|
|
|
let Inst{0} = 0b0;
|
|
|
|
}
|
|
|
|
|
|
|
|
class MVE_VRINT<string rmode, bits<3> op, string suffix, bits<2> size,
|
|
|
|
list<dag> pattern=[]>
|
|
|
|
: MVE_float<!strconcat("vrint", rmode), suffix, (outs MQPR:$Qd),
|
|
|
|
(ins MQPR:$Qm), "$Qd, $Qm", vpred_r, "", pattern> {
|
|
|
|
bits<4> Qd;
|
|
|
|
|
|
|
|
let Inst{28} = 0b1;
|
|
|
|
let Inst{25-23} = 0b111;
|
|
|
|
let Inst{22} = Qd{3};
|
|
|
|
let Inst{21-20} = 0b11;
|
|
|
|
let Inst{19-18} = size;
|
|
|
|
let Inst{17-16} = 0b10;
|
|
|
|
let Inst{15-13} = Qd{2-0};
|
|
|
|
let Inst{11-10} = 0b01;
|
|
|
|
let Inst{9-7} = op{2-0};
|
|
|
|
let Inst{4} = 0b0;
|
2019-10-15 21:12:51 +08:00
|
|
|
let validForTailPredication = 1;
|
[ARM] Add a batch of MVE floating-point instructions.
Summary:
This includes floating-point basic arithmetic (add/sub/multiply),
complex add/multiply, unary negation and absolute value, rounding to
integer value, and conversion to/from integer formats.
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62675
llvm-svn: 364013
2019-06-21 17:35:07 +08:00
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
multiclass MVE_VRINT_ops<string suffix, bits<2> size, list<dag> pattern=[]> {
|
|
|
|
def N : MVE_VRINT<"n", 0b000, suffix, size, pattern>;
|
|
|
|
def X : MVE_VRINT<"x", 0b001, suffix, size, pattern>;
|
|
|
|
def A : MVE_VRINT<"a", 0b010, suffix, size, pattern>;
|
|
|
|
def Z : MVE_VRINT<"z", 0b011, suffix, size, pattern>;
|
|
|
|
def M : MVE_VRINT<"m", 0b101, suffix, size, pattern>;
|
|
|
|
def P : MVE_VRINT<"p", 0b111, suffix, size, pattern>;
|
|
|
|
}
|
|
|
|
|
|
|
|
defm MVE_VRINTf16 : MVE_VRINT_ops<"f16", 0b01>;
|
|
|
|
defm MVE_VRINTf32 : MVE_VRINT_ops<"f32", 0b10>;
|
|
|
|
|
2019-07-13 22:38:53 +08:00
|
|
|
let Predicates = [HasMVEFloat] in {
|
|
|
|
def : Pat<(v4f32 (frint (v4f32 MQPR:$val1))),
|
|
|
|
(v4f32 (MVE_VRINTf32X (v4f32 MQPR:$val1)))>;
|
|
|
|
def : Pat<(v8f16 (frint (v8f16 MQPR:$val1))),
|
|
|
|
(v8f16 (MVE_VRINTf16X (v8f16 MQPR:$val1)))>;
|
|
|
|
def : Pat<(v4f32 (fround (v4f32 MQPR:$val1))),
|
|
|
|
(v4f32 (MVE_VRINTf32A (v4f32 MQPR:$val1)))>;
|
|
|
|
def : Pat<(v8f16 (fround (v8f16 MQPR:$val1))),
|
|
|
|
(v8f16 (MVE_VRINTf16A (v8f16 MQPR:$val1)))>;
|
|
|
|
def : Pat<(v4f32 (ftrunc (v4f32 MQPR:$val1))),
|
|
|
|
(v4f32 (MVE_VRINTf32Z (v4f32 MQPR:$val1)))>;
|
|
|
|
def : Pat<(v8f16 (ftrunc (v8f16 MQPR:$val1))),
|
|
|
|
(v8f16 (MVE_VRINTf16Z (v8f16 MQPR:$val1)))>;
|
|
|
|
def : Pat<(v4f32 (ffloor (v4f32 MQPR:$val1))),
|
|
|
|
(v4f32 (MVE_VRINTf32M (v4f32 MQPR:$val1)))>;
|
|
|
|
def : Pat<(v8f16 (ffloor (v8f16 MQPR:$val1))),
|
|
|
|
(v8f16 (MVE_VRINTf16M (v8f16 MQPR:$val1)))>;
|
|
|
|
def : Pat<(v4f32 (fceil (v4f32 MQPR:$val1))),
|
|
|
|
(v4f32 (MVE_VRINTf32P (v4f32 MQPR:$val1)))>;
|
|
|
|
def : Pat<(v8f16 (fceil (v8f16 MQPR:$val1))),
|
|
|
|
(v8f16 (MVE_VRINTf16P (v8f16 MQPR:$val1)))>;
|
|
|
|
}
|
|
|
|
|
[ARM] Add a batch of MVE floating-point instructions.
Summary:
This includes floating-point basic arithmetic (add/sub/multiply),
complex add/multiply, unary negation and absolute value, rounding to
integer value, and conversion to/from integer formats.
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62675
llvm-svn: 364013
2019-06-21 17:35:07 +08:00
|
|
|
class MVEFloatArithNeon<string iname, string suffix, bit size,
|
|
|
|
dag oops, dag iops, string ops,
|
|
|
|
vpred_ops vpred, string cstr, list<dag> pattern=[]>
|
|
|
|
: MVE_float<iname, suffix, oops, iops, ops, vpred, cstr, pattern> {
|
|
|
|
let Inst{20} = size;
|
|
|
|
let Inst{16} = 0b0;
|
|
|
|
}
|
|
|
|
|
2019-11-25 22:10:59 +08:00
|
|
|
class MVE_VMUL_fp<string iname, string suffix, bit size, list<dag> pattern=[]>
|
|
|
|
: MVEFloatArithNeon<iname, suffix, size, (outs MQPR:$Qd),
|
[ARM] Add a batch of MVE floating-point instructions.
Summary:
This includes floating-point basic arithmetic (add/sub/multiply),
complex add/multiply, unary negation and absolute value, rounding to
integer value, and conversion to/from integer formats.
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62675
llvm-svn: 364013
2019-06-21 17:35:07 +08:00
|
|
|
(ins MQPR:$Qn, MQPR:$Qm), "$Qd, $Qn, $Qm", vpred_r, "",
|
|
|
|
pattern> {
|
|
|
|
bits<4> Qd;
|
|
|
|
bits<4> Qn;
|
|
|
|
|
|
|
|
let Inst{28} = 0b1;
|
|
|
|
let Inst{25-23} = 0b110;
|
|
|
|
let Inst{22} = Qd{3};
|
|
|
|
let Inst{21} = 0b0;
|
|
|
|
let Inst{19-17} = Qn{2-0};
|
|
|
|
let Inst{15-13} = Qd{2-0};
|
|
|
|
let Inst{12-8} = 0b01101;
|
|
|
|
let Inst{7} = Qn{3};
|
|
|
|
let Inst{4} = 0b1;
|
2019-10-15 21:12:51 +08:00
|
|
|
let validForTailPredication = 1;
|
[ARM] Add a batch of MVE floating-point instructions.
Summary:
This includes floating-point basic arithmetic (add/sub/multiply),
complex add/multiply, unary negation and absolute value, rounding to
integer value, and conversion to/from integer formats.
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62675
llvm-svn: 364013
2019-06-21 17:35:07 +08:00
|
|
|
}
|
|
|
|
|
2019-11-25 22:10:59 +08:00
|
|
|
multiclass MVE_VMULT_fp_m<string iname, bit bit_21, MVEVectorVTInfo VTI,
|
|
|
|
SDNode unpred_op, Intrinsic pred_int> {
|
|
|
|
def "" : MVE_VMUL_fp<iname, VTI.Suffix, VTI.Size{0}>;
|
[ARM] Add a batch of MVE floating-point instructions.
Summary:
This includes floating-point basic arithmetic (add/sub/multiply),
complex add/multiply, unary negation and absolute value, rounding to
integer value, and conversion to/from integer formats.
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62675
llvm-svn: 364013
2019-06-21 17:35:07 +08:00
|
|
|
|
2019-11-25 22:10:59 +08:00
|
|
|
let Predicates = [HasMVEFloat] in {
|
|
|
|
def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn))),
|
|
|
|
(VTI.Vec (!cast<Instruction>(NAME)
|
|
|
|
(VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>;
|
|
|
|
def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
|
|
|
|
(VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive))),
|
|
|
|
(VTI.Vec (!cast<Instruction>(NAME)
|
|
|
|
(VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
|
|
|
|
(i32 1), (VTI.Pred VCCR:$mask),
|
|
|
|
(VTI.Vec MQPR:$inactive)))>;
|
|
|
|
}
|
2019-06-28 19:44:03 +08:00
|
|
|
}
|
|
|
|
|
2019-11-25 22:10:59 +08:00
|
|
|
multiclass MVE_VMUL_fp_m<MVEVectorVTInfo VTI>
|
|
|
|
: MVE_VMULT_fp_m<"vmul", 0, VTI, fmul, int_arm_mve_mul_predicated>;
|
|
|
|
|
|
|
|
defm MVE_VMULf32 : MVE_VMUL_fp_m<MVE_v4f32>;
|
|
|
|
defm MVE_VMULf16 : MVE_VMUL_fp_m<MVE_v8f16>;
|
|
|
|
|
[ARM] Add a batch of MVE floating-point instructions.
Summary:
This includes floating-point basic arithmetic (add/sub/multiply),
complex add/multiply, unary negation and absolute value, rounding to
integer value, and conversion to/from integer formats.
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62675
llvm-svn: 364013
2019-06-21 17:35:07 +08:00
|
|
|
class MVE_VCMLA<string suffix, bit size, list<dag> pattern=[]>
|
|
|
|
: MVEFloatArithNeon<"vcmla", suffix, size, (outs MQPR:$Qd),
|
|
|
|
(ins MQPR:$Qd_src, MQPR:$Qn, MQPR:$Qm, complexrotateop:$rot),
|
|
|
|
"$Qd, $Qn, $Qm, $rot", vpred_n, "$Qd = $Qd_src", pattern> {
|
|
|
|
bits<4> Qd;
|
|
|
|
bits<4> Qn;
|
|
|
|
bits<2> rot;
|
|
|
|
|
|
|
|
let Inst{28} = 0b1;
|
|
|
|
let Inst{25} = 0b0;
|
|
|
|
let Inst{24-23} = rot;
|
|
|
|
let Inst{22} = Qd{3};
|
|
|
|
let Inst{21} = 0b1;
|
|
|
|
let Inst{19-17} = Qn{2-0};
|
|
|
|
let Inst{15-13} = Qd{2-0};
|
|
|
|
let Inst{12-8} = 0b01000;
|
|
|
|
let Inst{7} = Qn{3};
|
|
|
|
let Inst{4} = 0b0;
|
|
|
|
}
|
|
|
|
|
|
|
|
def MVE_VCMLAf16 : MVE_VCMLA<"f16", 0b0>;
|
|
|
|
def MVE_VCMLAf32 : MVE_VCMLA<"f32", 0b1>;
|
|
|
|
|
|
|
|
class MVE_VADDSUBFMA_fp<string iname, string suffix, bit size, bit bit_4,
|
|
|
|
bit bit_8, bit bit_21, dag iops=(ins),
|
|
|
|
vpred_ops vpred=vpred_r, string cstr="",
|
|
|
|
list<dag> pattern=[]>
|
|
|
|
: MVEFloatArithNeon<iname, suffix, size, (outs MQPR:$Qd),
|
|
|
|
!con(iops, (ins MQPR:$Qn, MQPR:$Qm)), "$Qd, $Qn, $Qm",
|
|
|
|
vpred, cstr, pattern> {
|
|
|
|
bits<4> Qd;
|
|
|
|
bits<4> Qn;
|
|
|
|
|
|
|
|
let Inst{28} = 0b0;
|
|
|
|
let Inst{25-23} = 0b110;
|
|
|
|
let Inst{22} = Qd{3};
|
|
|
|
let Inst{21} = bit_21;
|
|
|
|
let Inst{19-17} = Qn{2-0};
|
|
|
|
let Inst{15-13} = Qd{2-0};
|
|
|
|
let Inst{11-9} = 0b110;
|
|
|
|
let Inst{8} = bit_8;
|
|
|
|
let Inst{7} = Qn{3};
|
|
|
|
let Inst{4} = bit_4;
|
|
|
|
}
|
|
|
|
|
|
|
|
def MVE_VFMAf32 : MVE_VADDSUBFMA_fp<"vfma", "f32", 0b0, 0b1, 0b0, 0b0,
|
|
|
|
(ins MQPR:$Qd_src), vpred_n, "$Qd = $Qd_src">;
|
|
|
|
def MVE_VFMAf16 : MVE_VADDSUBFMA_fp<"vfma", "f16", 0b1, 0b1, 0b0, 0b0,
|
|
|
|
(ins MQPR:$Qd_src), vpred_n, "$Qd = $Qd_src">;
|
|
|
|
|
|
|
|
def MVE_VFMSf32 : MVE_VADDSUBFMA_fp<"vfms", "f32", 0b0, 0b1, 0b0, 0b1,
|
|
|
|
(ins MQPR:$Qd_src), vpred_n, "$Qd = $Qd_src">;
|
|
|
|
def MVE_VFMSf16 : MVE_VADDSUBFMA_fp<"vfms", "f16", 0b1, 0b1, 0b0, 0b1,
|
|
|
|
(ins MQPR:$Qd_src), vpred_n, "$Qd = $Qd_src">;
|
|
|
|
|
2019-08-08 16:21:01 +08:00
|
|
|
let Predicates = [HasMVEFloat] in {
|
|
|
|
def : Pat<(v8f16 (fma (v8f16 MQPR:$src1), (v8f16 MQPR:$src2), (v8f16 MQPR:$src3))),
|
|
|
|
(v8f16 (MVE_VFMAf16 $src3, $src1, $src2))>;
|
|
|
|
def : Pat<(v4f32 (fma (v4f32 MQPR:$src1), (v4f32 MQPR:$src2), (v4f32 MQPR:$src3))),
|
|
|
|
(v4f32 (MVE_VFMAf32 $src3, $src1, $src2))>;
|
2019-11-04 22:06:04 +08:00
|
|
|
def : Pat<(v8f16 (fma (fneg (v8f16 MQPR:$src1)), (v8f16 MQPR:$src2), (v8f16 MQPR:$src3))),
|
|
|
|
(v8f16 (MVE_VFMSf16 $src3, $src1, $src2))>;
|
|
|
|
def : Pat<(v4f32 (fma (fneg (v4f32 MQPR:$src1)), (v4f32 MQPR:$src2), (v4f32 MQPR:$src3))),
|
|
|
|
(v4f32 (MVE_VFMSf32 $src3, $src1, $src2))>;
|
2019-08-08 16:21:01 +08:00
|
|
|
}
|
|
|
|
|
[ARM] Begin adding IR intrinsics for MVE instructions.
This commit, together with the next few, will add a representative
sample of the kind of IR intrinsics that we'll need in order to
implement the user-facing ACLE intrinsics for MVE. Supporting all of
them will take more work; the intention of this initial series of
commits is to implement an intrinsic or two from lots of different
categories, as examples and proofs of concept.
This initial commit introduces a small number of IR intrinsics for
instructions simple enough that they can use Tablegen ISel patterns:
the predicated versions of the VADD and VSUB instructions (both
integer and FP), VMIN and VMAX, and the float->half VCVT instruction
(predicated and unpredicated).
When using VPT-predicated instructions in automatic code generation,
it will be convenient to specify the predicate value as a vector of
the appropriate number of i1. To make it easy to specify all sizes of
an instruction in one go and give each one the matching predicate
vector type, I've added a system of Tablegen informational records
describing MVE's vector types: each one gives the underlying LLVM IR
ValueType (which may not be the same if the MVE vector is of
explicitly signed or unsigned integers) and an appropriate vNi1 to use
as the predicate vector.
(Also, those info records include the usual encoding for the types, so
that as we add associations between each instruction encoding and one
of the new `MVEVectorVTInfo` records, we can remove some of the
existing template parameters and replace them with references to the
vector type info's fields.)
The user-facing ACLE intrinsics will receive a predicate mask as a
16-bit integer, so I've also provided a pair of intrinsics i2v and
v2i, to convert between an integer and a vector of i1 by just changing
the register class.
Reviewers: dmgreen, miyuki, ostannard
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D67158
2019-10-08 00:00:51 +08:00
|
|
|
multiclass MVE_VADDSUB_fp_m<string iname, bit bit_21, MVEVectorVTInfo VTI,
|
|
|
|
SDNode unpred_op, Intrinsic pred_int> {
|
|
|
|
def "" : MVE_VADDSUBFMA_fp<iname, VTI.Suffix, VTI.Size{0}, 0, 1, bit_21> {
|
|
|
|
let validForTailPredication = 1;
|
|
|
|
}
|
2019-07-31 18:44:11 +08:00
|
|
|
|
[ARM] Begin adding IR intrinsics for MVE instructions.
This commit, together with the next few, will add a representative
sample of the kind of IR intrinsics that we'll need in order to
implement the user-facing ACLE intrinsics for MVE. Supporting all of
them will take more work; the intention of this initial series of
commits is to implement an intrinsic or two from lots of different
categories, as examples and proofs of concept.
This initial commit introduces a small number of IR intrinsics for
instructions simple enough that they can use Tablegen ISel patterns:
the predicated versions of the VADD and VSUB instructions (both
integer and FP), VMIN and VMAX, and the float->half VCVT instruction
(predicated and unpredicated).
When using VPT-predicated instructions in automatic code generation,
it will be convenient to specify the predicate value as a vector of
the appropriate number of i1. To make it easy to specify all sizes of
an instruction in one go and give each one the matching predicate
vector type, I've added a system of Tablegen informational records
describing MVE's vector types: each one gives the underlying LLVM IR
ValueType (which may not be the same if the MVE vector is of
explicitly signed or unsigned integers) and an appropriate vNi1 to use
as the predicate vector.
(Also, those info records include the usual encoding for the types, so
that as we add associations between each instruction encoding and one
of the new `MVEVectorVTInfo` records, we can remove some of the
existing template parameters and replace them with references to the
vector type info's fields.)
The user-facing ACLE intrinsics will receive a predicate mask as a
16-bit integer, so I've also provided a pair of intrinsics i2v and
v2i, to convert between an integer and a vector of i1 by just changing
the register class.
Reviewers: dmgreen, miyuki, ostannard
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D67158
2019-10-08 00:00:51 +08:00
|
|
|
let Predicates = [HasMVEFloat] in {
|
|
|
|
def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn))),
|
|
|
|
(VTI.Vec (!cast<Instruction>(NAME)
|
|
|
|
(VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>;
|
|
|
|
def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
|
|
|
|
(VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive))),
|
|
|
|
(VTI.Vec (!cast<Instruction>(NAME)
|
|
|
|
(VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
|
|
|
|
(i32 1), (VTI.Pred VCCR:$mask),
|
|
|
|
(VTI.Vec MQPR:$inactive)))>;
|
|
|
|
}
|
2019-06-28 15:41:09 +08:00
|
|
|
}
|
|
|
|
|
[ARM] Begin adding IR intrinsics for MVE instructions.
This commit, together with the next few, will add a representative
sample of the kind of IR intrinsics that we'll need in order to
implement the user-facing ACLE intrinsics for MVE. Supporting all of
them will take more work; the intention of this initial series of
commits is to implement an intrinsic or two from lots of different
categories, as examples and proofs of concept.
This initial commit introduces a small number of IR intrinsics for
instructions simple enough that they can use Tablegen ISel patterns:
the predicated versions of the VADD and VSUB instructions (both
integer and FP), VMIN and VMAX, and the float->half VCVT instruction
(predicated and unpredicated).
When using VPT-predicated instructions in automatic code generation,
it will be convenient to specify the predicate value as a vector of
the appropriate number of i1. To make it easy to specify all sizes of
an instruction in one go and give each one the matching predicate
vector type, I've added a system of Tablegen informational records
describing MVE's vector types: each one gives the underlying LLVM IR
ValueType (which may not be the same if the MVE vector is of
explicitly signed or unsigned integers) and an appropriate vNi1 to use
as the predicate vector.
(Also, those info records include the usual encoding for the types, so
that as we add associations between each instruction encoding and one
of the new `MVEVectorVTInfo` records, we can remove some of the
existing template parameters and replace them with references to the
vector type info's fields.)
The user-facing ACLE intrinsics will receive a predicate mask as a
16-bit integer, so I've also provided a pair of intrinsics i2v and
v2i, to convert between an integer and a vector of i1 by just changing
the register class.
Reviewers: dmgreen, miyuki, ostannard
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D67158
2019-10-08 00:00:51 +08:00
|
|
|
multiclass MVE_VADD_fp_m<MVEVectorVTInfo VTI>
|
|
|
|
: MVE_VADDSUB_fp_m<"vadd", 0, VTI, fadd, int_arm_mve_add_predicated>;
|
|
|
|
multiclass MVE_VSUB_fp_m<MVEVectorVTInfo VTI>
|
|
|
|
: MVE_VADDSUB_fp_m<"vsub", 1, VTI, fsub, int_arm_mve_sub_predicated>;
|
2019-10-15 21:12:51 +08:00
|
|
|
|
[ARM] Begin adding IR intrinsics for MVE instructions.
This commit, together with the next few, will add a representative
sample of the kind of IR intrinsics that we'll need in order to
implement the user-facing ACLE intrinsics for MVE. Supporting all of
them will take more work; the intention of this initial series of
commits is to implement an intrinsic or two from lots of different
categories, as examples and proofs of concept.
This initial commit introduces a small number of IR intrinsics for
instructions simple enough that they can use Tablegen ISel patterns:
the predicated versions of the VADD and VSUB instructions (both
integer and FP), VMIN and VMAX, and the float->half VCVT instruction
(predicated and unpredicated).
When using VPT-predicated instructions in automatic code generation,
it will be convenient to specify the predicate value as a vector of
the appropriate number of i1. To make it easy to specify all sizes of
an instruction in one go and give each one the matching predicate
vector type, I've added a system of Tablegen informational records
describing MVE's vector types: each one gives the underlying LLVM IR
ValueType (which may not be the same if the MVE vector is of
explicitly signed or unsigned integers) and an appropriate vNi1 to use
as the predicate vector.
(Also, those info records include the usual encoding for the types, so
that as we add associations between each instruction encoding and one
of the new `MVEVectorVTInfo` records, we can remove some of the
existing template parameters and replace them with references to the
vector type info's fields.)
The user-facing ACLE intrinsics will receive a predicate mask as a
16-bit integer, so I've also provided a pair of intrinsics i2v and
v2i, to convert between an integer and a vector of i1 by just changing
the register class.
Reviewers: dmgreen, miyuki, ostannard
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D67158
2019-10-08 00:00:51 +08:00
|
|
|
defm MVE_VADDf32 : MVE_VADD_fp_m<MVE_v4f32>;
|
|
|
|
defm MVE_VADDf16 : MVE_VADD_fp_m<MVE_v8f16>;
|
[ARM] Add a batch of MVE floating-point instructions.
Summary:
This includes floating-point basic arithmetic (add/sub/multiply),
complex add/multiply, unary negation and absolute value, rounding to
integer value, and conversion to/from integer formats.
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62675
llvm-svn: 364013
2019-06-21 17:35:07 +08:00
|
|
|
|
[ARM] Begin adding IR intrinsics for MVE instructions.
This commit, together with the next few, will add a representative
sample of the kind of IR intrinsics that we'll need in order to
implement the user-facing ACLE intrinsics for MVE. Supporting all of
them will take more work; the intention of this initial series of
commits is to implement an intrinsic or two from lots of different
categories, as examples and proofs of concept.
This initial commit introduces a small number of IR intrinsics for
instructions simple enough that they can use Tablegen ISel patterns:
the predicated versions of the VADD and VSUB instructions (both
integer and FP), VMIN and VMAX, and the float->half VCVT instruction
(predicated and unpredicated).
When using VPT-predicated instructions in automatic code generation,
it will be convenient to specify the predicate value as a vector of
the appropriate number of i1. To make it easy to specify all sizes of
an instruction in one go and give each one the matching predicate
vector type, I've added a system of Tablegen informational records
describing MVE's vector types: each one gives the underlying LLVM IR
ValueType (which may not be the same if the MVE vector is of
explicitly signed or unsigned integers) and an appropriate vNi1 to use
as the predicate vector.
(Also, those info records include the usual encoding for the types, so
that as we add associations between each instruction encoding and one
of the new `MVEVectorVTInfo` records, we can remove some of the
existing template parameters and replace them with references to the
vector type info's fields.)
The user-facing ACLE intrinsics will receive a predicate mask as a
16-bit integer, so I've also provided a pair of intrinsics i2v and
v2i, to convert between an integer and a vector of i1 by just changing
the register class.
Reviewers: dmgreen, miyuki, ostannard
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D67158
2019-10-08 00:00:51 +08:00
|
|
|
defm MVE_VSUBf32 : MVE_VSUB_fp_m<MVE_v4f32>;
|
|
|
|
defm MVE_VSUBf16 : MVE_VSUB_fp_m<MVE_v8f16>;
|
2019-06-28 15:41:09 +08:00
|
|
|
|
2019-09-13 19:20:17 +08:00
|
|
|
class MVE_VCADD<string suffix, bit size, string cstr="", list<dag> pattern=[]>
|
[ARM] Add a batch of MVE floating-point instructions.
Summary:
This includes floating-point basic arithmetic (add/sub/multiply),
complex add/multiply, unary negation and absolute value, rounding to
integer value, and conversion to/from integer formats.
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62675
llvm-svn: 364013
2019-06-21 17:35:07 +08:00
|
|
|
: MVEFloatArithNeon<"vcadd", suffix, size, (outs MQPR:$Qd),
|
|
|
|
(ins MQPR:$Qn, MQPR:$Qm, complexrotateopodd:$rot),
|
2019-09-13 19:20:17 +08:00
|
|
|
"$Qd, $Qn, $Qm, $rot", vpred_r, cstr, pattern> {
|
[ARM] Add a batch of MVE floating-point instructions.
Summary:
This includes floating-point basic arithmetic (add/sub/multiply),
complex add/multiply, unary negation and absolute value, rounding to
integer value, and conversion to/from integer formats.
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62675
llvm-svn: 364013
2019-06-21 17:35:07 +08:00
|
|
|
bits<4> Qd;
|
|
|
|
bits<4> Qn;
|
|
|
|
bit rot;
|
|
|
|
|
|
|
|
let Inst{28} = 0b1;
|
|
|
|
let Inst{25} = 0b0;
|
|
|
|
let Inst{24} = rot;
|
|
|
|
let Inst{23} = 0b1;
|
|
|
|
let Inst{22} = Qd{3};
|
|
|
|
let Inst{21} = 0b0;
|
|
|
|
let Inst{19-17} = Qn{2-0};
|
|
|
|
let Inst{15-13} = Qd{2-0};
|
|
|
|
let Inst{12-8} = 0b01000;
|
|
|
|
let Inst{7} = Qn{3};
|
|
|
|
let Inst{4} = 0b0;
|
|
|
|
}
|
|
|
|
|
|
|
|
def MVE_VCADDf16 : MVE_VCADD<"f16", 0b0>;
|
2019-09-13 19:20:17 +08:00
|
|
|
def MVE_VCADDf32 : MVE_VCADD<"f32", 0b1, "@earlyclobber $Qd">;
|
[ARM] Add a batch of MVE floating-point instructions.
Summary:
This includes floating-point basic arithmetic (add/sub/multiply),
complex add/multiply, unary negation and absolute value, rounding to
integer value, and conversion to/from integer formats.
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62675
llvm-svn: 364013
2019-06-21 17:35:07 +08:00
|
|
|
|
|
|
|
class MVE_VABD_fp<string suffix, bit size>
|
|
|
|
: MVE_float<"vabd", suffix, (outs MQPR:$Qd), (ins MQPR:$Qn, MQPR:$Qm),
|
|
|
|
"$Qd, $Qn, $Qm", vpred_r, ""> {
|
|
|
|
bits<4> Qd;
|
|
|
|
bits<4> Qn;
|
|
|
|
|
|
|
|
let Inst{28} = 0b1;
|
|
|
|
let Inst{25-23} = 0b110;
|
|
|
|
let Inst{22} = Qd{3};
|
|
|
|
let Inst{21} = 0b1;
|
|
|
|
let Inst{20} = size;
|
|
|
|
let Inst{19-17} = Qn{2-0};
|
|
|
|
let Inst{16} = 0b0;
|
|
|
|
let Inst{15-13} = Qd{2-0};
|
|
|
|
let Inst{11-8} = 0b1101;
|
|
|
|
let Inst{7} = Qn{3};
|
|
|
|
let Inst{4} = 0b0;
|
2019-10-15 21:12:51 +08:00
|
|
|
let validForTailPredication = 1;
|
[ARM] Add a batch of MVE floating-point instructions.
Summary:
This includes floating-point basic arithmetic (add/sub/multiply),
complex add/multiply, unary negation and absolute value, rounding to
integer value, and conversion to/from integer formats.
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62675
llvm-svn: 364013
2019-06-21 17:35:07 +08:00
|
|
|
}
|
|
|
|
|
2019-11-14 00:57:28 +08:00
|
|
|
multiclass MVE_VABDT_fp_m<MVEVectorVTInfo VTI,
|
|
|
|
Intrinsic unpred_int, Intrinsic pred_int> {
|
|
|
|
def "" : MVE_VABD_fp<VTI.Suffix, VTI.Size{0}>;
|
|
|
|
|
|
|
|
let Predicates = [HasMVEFloat] in {
|
|
|
|
def : Pat<(VTI.Vec (unpred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn))),
|
|
|
|
(VTI.Vec (!cast<Instruction>(NAME)
|
|
|
|
(VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>;
|
|
|
|
def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
|
|
|
|
(VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive))),
|
|
|
|
(VTI.Vec (!cast<Instruction>(NAME)
|
|
|
|
(VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
|
|
|
|
(i32 1), (VTI.Pred VCCR:$mask),
|
|
|
|
(VTI.Vec MQPR:$inactive)))>;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
multiclass MVE_VABD_fp_m<MVEVectorVTInfo VTI>
|
|
|
|
: MVE_VABDT_fp_m<VTI, int_arm_mve_vabd, int_arm_mve_abd_predicated>;
|
|
|
|
|
|
|
|
defm MVE_VABDf32 : MVE_VABD_fp_m<MVE_v4f32>;
|
|
|
|
defm MVE_VABDf16 : MVE_VABD_fp_m<MVE_v8f16>;
|
[ARM] Add a batch of MVE floating-point instructions.
Summary:
This includes floating-point basic arithmetic (add/sub/multiply),
complex add/multiply, unary negation and absolute value, rounding to
integer value, and conversion to/from integer formats.
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62675
llvm-svn: 364013
2019-06-21 17:35:07 +08:00
|
|
|
|
|
|
|
class MVE_VCVT_fix<string suffix, bit fsi, bit U, bit op,
|
|
|
|
Operand imm_operand_type, list<dag> pattern=[]>
|
|
|
|
: MVE_float<"vcvt", suffix,
|
|
|
|
(outs MQPR:$Qd), (ins MQPR:$Qm, imm_operand_type:$imm6),
|
|
|
|
"$Qd, $Qm, $imm6", vpred_r, "", pattern> {
|
|
|
|
bits<4> Qd;
|
|
|
|
bits<6> imm6;
|
|
|
|
|
|
|
|
let Inst{28} = U;
|
|
|
|
let Inst{25-23} = 0b111;
|
|
|
|
let Inst{22} = Qd{3};
|
|
|
|
let Inst{21} = 0b1;
|
|
|
|
let Inst{19-16} = imm6{3-0};
|
|
|
|
let Inst{15-13} = Qd{2-0};
|
|
|
|
let Inst{11-10} = 0b11;
|
|
|
|
let Inst{9} = fsi;
|
|
|
|
let Inst{8} = op;
|
|
|
|
let Inst{7} = 0b0;
|
|
|
|
let Inst{4} = 0b1;
|
|
|
|
|
|
|
|
let DecoderMethod = "DecodeMVEVCVTt1fp";
|
2019-10-15 21:12:51 +08:00
|
|
|
let validForTailPredication = 1;
|
[ARM] Add a batch of MVE floating-point instructions.
Summary:
This includes floating-point basic arithmetic (add/sub/multiply),
complex add/multiply, unary negation and absolute value, rounding to
integer value, and conversion to/from integer formats.
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62675
llvm-svn: 364013
2019-06-21 17:35:07 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
class MVE_VCVT_imm_asmop<int Bits> : AsmOperandClass {
|
|
|
|
let PredicateMethod = "isImmediate<1," # Bits # ">";
|
|
|
|
let DiagnosticString =
|
|
|
|
"MVE fixed-point immediate operand must be between 1 and " # Bits;
|
|
|
|
let Name = "MVEVcvtImm" # Bits;
|
|
|
|
let RenderMethod = "addImmOperands";
|
|
|
|
}
|
|
|
|
class MVE_VCVT_imm<int Bits>: Operand<i32> {
|
|
|
|
let ParserMatchClass = MVE_VCVT_imm_asmop<Bits>;
|
|
|
|
let EncoderMethod = "getNEONVcvtImm32OpValue";
|
|
|
|
let DecoderMethod = "DecodeVCVTImmOperand";
|
|
|
|
}
|
|
|
|
|
|
|
|
class MVE_VCVT_fix_f32<string suffix, bit U, bit op>
|
|
|
|
: MVE_VCVT_fix<suffix, 0b1, U, op, MVE_VCVT_imm<32>> {
|
|
|
|
let Inst{20} = imm6{4};
|
|
|
|
}
|
|
|
|
class MVE_VCVT_fix_f16<string suffix, bit U, bit op>
|
|
|
|
: MVE_VCVT_fix<suffix, 0b0, U, op, MVE_VCVT_imm<16>> {
|
|
|
|
let Inst{20} = 0b1;
|
|
|
|
}
|
|
|
|
|
|
|
|
def MVE_VCVTf16s16_fix : MVE_VCVT_fix_f16<"f16.s16", 0b0, 0b0>;
|
|
|
|
def MVE_VCVTs16f16_fix : MVE_VCVT_fix_f16<"s16.f16", 0b0, 0b1>;
|
|
|
|
def MVE_VCVTf16u16_fix : MVE_VCVT_fix_f16<"f16.u16", 0b1, 0b0>;
|
|
|
|
def MVE_VCVTu16f16_fix : MVE_VCVT_fix_f16<"u16.f16", 0b1, 0b1>;
|
|
|
|
def MVE_VCVTf32s32_fix : MVE_VCVT_fix_f32<"f32.s32", 0b0, 0b0>;
|
|
|
|
def MVE_VCVTs32f32_fix : MVE_VCVT_fix_f32<"s32.f32", 0b0, 0b1>;
|
|
|
|
def MVE_VCVTf32u32_fix : MVE_VCVT_fix_f32<"f32.u32", 0b1, 0b0>;
|
|
|
|
def MVE_VCVTu32f32_fix : MVE_VCVT_fix_f32<"u32.f32", 0b1, 0b1>;
|
|
|
|
|
|
|
|
class MVE_VCVT_fp_int_anpm<string suffix, bits<2> size, bit op, string anpm,
|
|
|
|
bits<2> rm, list<dag> pattern=[]>
|
|
|
|
: MVE_float<!strconcat("vcvt", anpm), suffix, (outs MQPR:$Qd),
|
|
|
|
(ins MQPR:$Qm), "$Qd, $Qm", vpred_r, "", pattern> {
|
|
|
|
bits<4> Qd;
|
|
|
|
|
|
|
|
let Inst{28} = 0b1;
|
|
|
|
let Inst{25-23} = 0b111;
|
|
|
|
let Inst{22} = Qd{3};
|
|
|
|
let Inst{21-20} = 0b11;
|
|
|
|
let Inst{19-18} = size;
|
|
|
|
let Inst{17-16} = 0b11;
|
|
|
|
let Inst{15-13} = Qd{2-0};
|
|
|
|
let Inst{12-10} = 0b000;
|
|
|
|
let Inst{9-8} = rm;
|
|
|
|
let Inst{7} = op;
|
|
|
|
let Inst{4} = 0b0;
|
2019-10-15 21:12:51 +08:00
|
|
|
let validForTailPredication = 1;
|
[ARM] Add a batch of MVE floating-point instructions.
Summary:
This includes floating-point basic arithmetic (add/sub/multiply),
complex add/multiply, unary negation and absolute value, rounding to
integer value, and conversion to/from integer formats.
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62675
llvm-svn: 364013
2019-06-21 17:35:07 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
multiclass MVE_VCVT_fp_int_anpm_multi<string suffix, bits<2> size, bit op,
|
|
|
|
list<dag> pattern=[]> {
|
|
|
|
def a : MVE_VCVT_fp_int_anpm<suffix, size, op, "a", 0b00>;
|
|
|
|
def n : MVE_VCVT_fp_int_anpm<suffix, size, op, "n", 0b01>;
|
|
|
|
def p : MVE_VCVT_fp_int_anpm<suffix, size, op, "p", 0b10>;
|
|
|
|
def m : MVE_VCVT_fp_int_anpm<suffix, size, op, "m", 0b11>;
|
|
|
|
}
|
|
|
|
|
|
|
|
// This defines instructions such as MVE_VCVTu16f16a, with an explicit
|
|
|
|
// rounding-mode suffix on the mnemonic. The class below will define
|
|
|
|
// the bare MVE_VCVTu16f16 (with implied rounding toward zero).
|
|
|
|
defm MVE_VCVTs16f16 : MVE_VCVT_fp_int_anpm_multi<"s16.f16", 0b01, 0b0>;
|
|
|
|
defm MVE_VCVTu16f16 : MVE_VCVT_fp_int_anpm_multi<"u16.f16", 0b01, 0b1>;
|
|
|
|
defm MVE_VCVTs32f32 : MVE_VCVT_fp_int_anpm_multi<"s32.f32", 0b10, 0b0>;
|
|
|
|
defm MVE_VCVTu32f32 : MVE_VCVT_fp_int_anpm_multi<"u32.f32", 0b10, 0b1>;
|
|
|
|
|
|
|
|
class MVE_VCVT_fp_int<string suffix, bits<2> size, bits<2> op,
|
|
|
|
list<dag> pattern=[]>
|
|
|
|
: MVE_float<"vcvt", suffix, (outs MQPR:$Qd),
|
|
|
|
(ins MQPR:$Qm), "$Qd, $Qm", vpred_r, "", pattern> {
|
|
|
|
bits<4> Qd;
|
|
|
|
|
|
|
|
let Inst{28} = 0b1;
|
|
|
|
let Inst{25-23} = 0b111;
|
|
|
|
let Inst{22} = Qd{3};
|
|
|
|
let Inst{21-20} = 0b11;
|
|
|
|
let Inst{19-18} = size;
|
|
|
|
let Inst{17-16} = 0b11;
|
|
|
|
let Inst{15-13} = Qd{2-0};
|
|
|
|
let Inst{12-9} = 0b0011;
|
|
|
|
let Inst{8-7} = op;
|
|
|
|
let Inst{4} = 0b0;
|
2019-10-15 21:12:51 +08:00
|
|
|
let validForTailPredication = 1;
|
[ARM] Add a batch of MVE floating-point instructions.
Summary:
This includes floating-point basic arithmetic (add/sub/multiply),
complex add/multiply, unary negation and absolute value, rounding to
integer value, and conversion to/from integer formats.
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62675
llvm-svn: 364013
2019-06-21 17:35:07 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// The unsuffixed VCVT for float->int implicitly rounds toward zero,
|
|
|
|
// which I reflect here in the llvm instruction names
|
|
|
|
def MVE_VCVTs16f16z : MVE_VCVT_fp_int<"s16.f16", 0b01, 0b10>;
|
|
|
|
def MVE_VCVTu16f16z : MVE_VCVT_fp_int<"u16.f16", 0b01, 0b11>;
|
|
|
|
def MVE_VCVTs32f32z : MVE_VCVT_fp_int<"s32.f32", 0b10, 0b10>;
|
|
|
|
def MVE_VCVTu32f32z : MVE_VCVT_fp_int<"u32.f32", 0b10, 0b11>;
|
|
|
|
// Whereas VCVT for int->float rounds to nearest
|
|
|
|
def MVE_VCVTf16s16n : MVE_VCVT_fp_int<"f16.s16", 0b01, 0b00>;
|
|
|
|
def MVE_VCVTf16u16n : MVE_VCVT_fp_int<"f16.u16", 0b01, 0b01>;
|
|
|
|
def MVE_VCVTf32s32n : MVE_VCVT_fp_int<"f32.s32", 0b10, 0b00>;
|
|
|
|
def MVE_VCVTf32u32n : MVE_VCVT_fp_int<"f32.u32", 0b10, 0b01>;
|
|
|
|
|
2019-07-05 17:34:30 +08:00
|
|
|
let Predicates = [HasMVEFloat] in {
|
2019-07-16 01:29:06 +08:00
|
|
|
def : Pat<(v4i32 (fp_to_sint (v4f32 MQPR:$src))),
|
|
|
|
(v4i32 (MVE_VCVTs32f32z (v4f32 MQPR:$src)))>;
|
|
|
|
def : Pat<(v4i32 (fp_to_uint (v4f32 MQPR:$src))),
|
|
|
|
(v4i32 (MVE_VCVTu32f32z (v4f32 MQPR:$src)))>;
|
|
|
|
def : Pat<(v8i16 (fp_to_sint (v8f16 MQPR:$src))),
|
|
|
|
(v8i16 (MVE_VCVTs16f16z (v8f16 MQPR:$src)))>;
|
|
|
|
def : Pat<(v8i16 (fp_to_uint (v8f16 MQPR:$src))),
|
|
|
|
(v8i16 (MVE_VCVTu16f16z (v8f16 MQPR:$src)))>;
|
|
|
|
def : Pat<(v4f32 (sint_to_fp (v4i32 MQPR:$src))),
|
|
|
|
(v4f32 (MVE_VCVTf32s32n (v4i32 MQPR:$src)))>;
|
|
|
|
def : Pat<(v4f32 (uint_to_fp (v4i32 MQPR:$src))),
|
|
|
|
(v4f32 (MVE_VCVTf32u32n (v4i32 MQPR:$src)))>;
|
|
|
|
def : Pat<(v8f16 (sint_to_fp (v8i16 MQPR:$src))),
|
|
|
|
(v8f16 (MVE_VCVTf16s16n (v8i16 MQPR:$src)))>;
|
|
|
|
def : Pat<(v8f16 (uint_to_fp (v8i16 MQPR:$src))),
|
|
|
|
(v8f16 (MVE_VCVTf16u16n (v8i16 MQPR:$src)))>;
|
2019-07-05 17:34:30 +08:00
|
|
|
}
|
|
|
|
|
[ARM] Add a batch of MVE floating-point instructions.
Summary:
This includes floating-point basic arithmetic (add/sub/multiply),
complex add/multiply, unary negation and absolute value, rounding to
integer value, and conversion to/from integer formats.
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62675
llvm-svn: 364013
2019-06-21 17:35:07 +08:00
|
|
|
class MVE_VABSNEG_fp<string iname, string suffix, bits<2> size, bit negate,
|
|
|
|
list<dag> pattern=[]>
|
|
|
|
: MVE_float<iname, suffix, (outs MQPR:$Qd),
|
|
|
|
(ins MQPR:$Qm), "$Qd, $Qm", vpred_r, "", pattern> {
|
|
|
|
bits<4> Qd;
|
|
|
|
|
|
|
|
let Inst{28} = 0b1;
|
|
|
|
let Inst{25-23} = 0b111;
|
|
|
|
let Inst{22} = Qd{3};
|
|
|
|
let Inst{21-20} = 0b11;
|
|
|
|
let Inst{19-18} = size;
|
|
|
|
let Inst{17-16} = 0b01;
|
|
|
|
let Inst{15-13} = Qd{2-0};
|
|
|
|
let Inst{11-8} = 0b0111;
|
|
|
|
let Inst{7} = negate;
|
|
|
|
let Inst{4} = 0b0;
|
2019-10-15 21:12:51 +08:00
|
|
|
let validForTailPredication = 1;
|
[ARM] Add a batch of MVE floating-point instructions.
Summary:
This includes floating-point basic arithmetic (add/sub/multiply),
complex add/multiply, unary negation and absolute value, rounding to
integer value, and conversion to/from integer formats.
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62675
llvm-svn: 364013
2019-06-21 17:35:07 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
def MVE_VABSf16 : MVE_VABSNEG_fp<"vabs", "f16", 0b01, 0b0>;
|
|
|
|
def MVE_VABSf32 : MVE_VABSNEG_fp<"vabs", "f32", 0b10, 0b0>;
|
|
|
|
|
2019-06-28 18:25:35 +08:00
|
|
|
let Predicates = [HasMVEFloat] in {
|
|
|
|
def : Pat<(v8f16 (fabs MQPR:$src)),
|
|
|
|
(MVE_VABSf16 MQPR:$src)>;
|
|
|
|
def : Pat<(v4f32 (fabs MQPR:$src)),
|
|
|
|
(MVE_VABSf32 MQPR:$src)>;
|
|
|
|
}
|
|
|
|
|
[ARM] Add a batch of MVE floating-point instructions.
Summary:
This includes floating-point basic arithmetic (add/sub/multiply),
complex add/multiply, unary negation and absolute value, rounding to
integer value, and conversion to/from integer formats.
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62675
llvm-svn: 364013
2019-06-21 17:35:07 +08:00
|
|
|
def MVE_VNEGf16 : MVE_VABSNEG_fp<"vneg", "f16", 0b01, 0b1>;
|
|
|
|
def MVE_VNEGf32 : MVE_VABSNEG_fp<"vneg", "f32", 0b10, 0b1>;
|
|
|
|
|
2019-06-28 18:25:35 +08:00
|
|
|
let Predicates = [HasMVEFloat] in {
|
|
|
|
def : Pat<(v8f16 (fneg MQPR:$src)),
|
|
|
|
(MVE_VNEGf16 MQPR:$src)>;
|
|
|
|
def : Pat<(v4f32 (fneg MQPR:$src)),
|
|
|
|
(MVE_VNEGf32 MQPR:$src)>;
|
|
|
|
}
|
|
|
|
|
[ARM] Add a batch of MVE floating-point instructions.
Summary:
This includes floating-point basic arithmetic (add/sub/multiply),
complex add/multiply, unary negation and absolute value, rounding to
integer value, and conversion to/from integer formats.
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62675
llvm-svn: 364013
2019-06-21 17:35:07 +08:00
|
|
|
class MVE_VMAXMINNMA<string iname, string suffix, bit size, bit bit_12,
|
|
|
|
list<dag> pattern=[]>
|
|
|
|
: MVE_f<(outs MQPR:$Qd), (ins MQPR:$Qd_src, MQPR:$Qm),
|
|
|
|
NoItinerary, iname, suffix, "$Qd, $Qm", vpred_n, "$Qd = $Qd_src",
|
|
|
|
pattern> {
|
|
|
|
bits<4> Qd;
|
|
|
|
bits<4> Qm;
|
|
|
|
|
|
|
|
let Inst{28} = size;
|
|
|
|
let Inst{25-23} = 0b100;
|
|
|
|
let Inst{22} = Qd{3};
|
|
|
|
let Inst{21-16} = 0b111111;
|
|
|
|
let Inst{15-13} = Qd{2-0};
|
|
|
|
let Inst{12} = bit_12;
|
|
|
|
let Inst{11-6} = 0b111010;
|
|
|
|
let Inst{5} = Qm{3};
|
|
|
|
let Inst{4} = 0b0;
|
|
|
|
let Inst{3-1} = Qm{2-0};
|
|
|
|
let Inst{0} = 0b1;
|
|
|
|
}
|
|
|
|
|
|
|
|
def MVE_VMAXNMAf32 : MVE_VMAXMINNMA<"vmaxnma", "f32", 0b0, 0b0>;
|
|
|
|
def MVE_VMAXNMAf16 : MVE_VMAXMINNMA<"vmaxnma", "f16", 0b1, 0b0>;
|
|
|
|
|
|
|
|
def MVE_VMINNMAf32 : MVE_VMAXMINNMA<"vminnma", "f32", 0b0, 0b1>;
|
|
|
|
def MVE_VMINNMAf16 : MVE_VMAXMINNMA<"vminnma", "f16", 0b1, 0b1>;
|
|
|
|
|
|
|
|
// end of MVE Floating Point instructions
|
|
|
|
|
2019-06-21 19:14:51 +08:00
|
|
|
// start of MVE compares
|
|
|
|
|
|
|
|
class MVE_VCMPqq<string suffix, bit bit_28, bits<2> bits_21_20,
|
|
|
|
VCMPPredicateOperand predtype, list<dag> pattern=[]>
|
|
|
|
: MVE_p<(outs VCCR:$P0), (ins MQPR:$Qn, MQPR:$Qm, predtype:$fc),
|
|
|
|
NoItinerary, "vcmp", suffix, "$fc, $Qn, $Qm", vpred_n, "", pattern> {
|
|
|
|
// Base class for comparing two vector registers
|
|
|
|
bits<3> fc;
|
|
|
|
bits<4> Qn;
|
|
|
|
bits<4> Qm;
|
|
|
|
|
|
|
|
let Inst{28} = bit_28;
|
|
|
|
let Inst{25-22} = 0b1000;
|
|
|
|
let Inst{21-20} = bits_21_20;
|
|
|
|
let Inst{19-17} = Qn{2-0};
|
|
|
|
let Inst{16-13} = 0b1000;
|
|
|
|
let Inst{12} = fc{2};
|
|
|
|
let Inst{11-8} = 0b1111;
|
|
|
|
let Inst{7} = fc{0};
|
|
|
|
let Inst{6} = 0b0;
|
|
|
|
let Inst{5} = Qm{3};
|
|
|
|
let Inst{4} = 0b0;
|
|
|
|
let Inst{3-1} = Qm{2-0};
|
|
|
|
let Inst{0} = fc{1};
|
|
|
|
|
|
|
|
let Constraints = "";
|
|
|
|
|
|
|
|
// We need a custom decoder method for these instructions because of
|
|
|
|
// the output VCCR operand, which isn't encoded in the instruction
|
|
|
|
// bits anywhere (there is only one choice for it) but has to be
|
|
|
|
// included in the MC operands so that codegen will be able to track
|
|
|
|
// its data flow between instructions, spill/reload it when
|
|
|
|
// necessary, etc. There seems to be no way to get the Tablegen
|
|
|
|
// decoder to emit an operand that isn't affected by any instruction
|
|
|
|
// bit.
|
|
|
|
let DecoderMethod = "DecodeMVEVCMP<false," # predtype.DecoderMethod # ">";
|
2019-10-15 21:12:51 +08:00
|
|
|
let validForTailPredication = 1;
|
2019-06-21 19:14:51 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
class MVE_VCMPqqf<string suffix, bit size>
|
|
|
|
: MVE_VCMPqq<suffix, size, 0b11, pred_basic_fp> {
|
|
|
|
let Predicates = [HasMVEFloat];
|
|
|
|
}
|
|
|
|
|
|
|
|
class MVE_VCMPqqi<string suffix, bits<2> size>
|
|
|
|
: MVE_VCMPqq<suffix, 0b1, size, pred_basic_i> {
|
|
|
|
let Inst{12} = 0b0;
|
|
|
|
let Inst{0} = 0b0;
|
|
|
|
}
|
|
|
|
|
|
|
|
class MVE_VCMPqqu<string suffix, bits<2> size>
|
|
|
|
: MVE_VCMPqq<suffix, 0b1, size, pred_basic_u> {
|
|
|
|
let Inst{12} = 0b0;
|
|
|
|
let Inst{0} = 0b1;
|
|
|
|
}
|
|
|
|
|
|
|
|
class MVE_VCMPqqs<string suffix, bits<2> size>
|
|
|
|
: MVE_VCMPqq<suffix, 0b1, size, pred_basic_s> {
|
|
|
|
let Inst{12} = 0b1;
|
|
|
|
}
|
|
|
|
|
|
|
|
def MVE_VCMPf32 : MVE_VCMPqqf<"f32", 0b0>;
|
|
|
|
def MVE_VCMPf16 : MVE_VCMPqqf<"f16", 0b1>;
|
|
|
|
|
|
|
|
def MVE_VCMPi8 : MVE_VCMPqqi<"i8", 0b00>;
|
|
|
|
def MVE_VCMPi16 : MVE_VCMPqqi<"i16", 0b01>;
|
|
|
|
def MVE_VCMPi32 : MVE_VCMPqqi<"i32", 0b10>;
|
|
|
|
|
|
|
|
def MVE_VCMPu8 : MVE_VCMPqqu<"u8", 0b00>;
|
|
|
|
def MVE_VCMPu16 : MVE_VCMPqqu<"u16", 0b01>;
|
|
|
|
def MVE_VCMPu32 : MVE_VCMPqqu<"u32", 0b10>;
|
|
|
|
|
|
|
|
def MVE_VCMPs8 : MVE_VCMPqqs<"s8", 0b00>;
|
|
|
|
def MVE_VCMPs16 : MVE_VCMPqqs<"s16", 0b01>;
|
|
|
|
def MVE_VCMPs32 : MVE_VCMPqqs<"s32", 0b10>;
|
|
|
|
|
|
|
|
class MVE_VCMPqr<string suffix, bit bit_28, bits<2> bits_21_20,
|
|
|
|
VCMPPredicateOperand predtype, list<dag> pattern=[]>
|
|
|
|
: MVE_p<(outs VCCR:$P0), (ins MQPR:$Qn, GPRwithZR:$Rm, predtype:$fc),
|
|
|
|
NoItinerary, "vcmp", suffix, "$fc, $Qn, $Rm", vpred_n, "", pattern> {
|
|
|
|
// Base class for comparing a vector register with a scalar
|
|
|
|
bits<3> fc;
|
|
|
|
bits<4> Qn;
|
|
|
|
bits<4> Rm;
|
|
|
|
|
|
|
|
let Inst{28} = bit_28;
|
|
|
|
let Inst{25-22} = 0b1000;
|
|
|
|
let Inst{21-20} = bits_21_20;
|
|
|
|
let Inst{19-17} = Qn{2-0};
|
|
|
|
let Inst{16-13} = 0b1000;
|
|
|
|
let Inst{12} = fc{2};
|
|
|
|
let Inst{11-8} = 0b1111;
|
|
|
|
let Inst{7} = fc{0};
|
|
|
|
let Inst{6} = 0b1;
|
|
|
|
let Inst{5} = fc{1};
|
|
|
|
let Inst{4} = 0b0;
|
|
|
|
let Inst{3-0} = Rm{3-0};
|
|
|
|
|
|
|
|
let Constraints = "";
|
|
|
|
// Custom decoder method, for the same reason as MVE_VCMPqq
|
|
|
|
let DecoderMethod = "DecodeMVEVCMP<true," # predtype.DecoderMethod # ">";
|
2019-10-15 21:12:51 +08:00
|
|
|
let validForTailPredication = 1;
|
2019-06-21 19:14:51 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
class MVE_VCMPqrf<string suffix, bit size>
|
|
|
|
: MVE_VCMPqr<suffix, size, 0b11, pred_basic_fp> {
|
|
|
|
let Predicates = [HasMVEFloat];
|
|
|
|
}
|
|
|
|
|
|
|
|
class MVE_VCMPqri<string suffix, bits<2> size>
|
|
|
|
: MVE_VCMPqr<suffix, 0b1, size, pred_basic_i> {
|
|
|
|
let Inst{12} = 0b0;
|
|
|
|
let Inst{5} = 0b0;
|
|
|
|
}
|
|
|
|
|
|
|
|
class MVE_VCMPqru<string suffix, bits<2> size>
|
|
|
|
: MVE_VCMPqr<suffix, 0b1, size, pred_basic_u> {
|
|
|
|
let Inst{12} = 0b0;
|
|
|
|
let Inst{5} = 0b1;
|
|
|
|
}
|
|
|
|
|
|
|
|
class MVE_VCMPqrs<string suffix, bits<2> size>
|
|
|
|
: MVE_VCMPqr<suffix, 0b1, size, pred_basic_s> {
|
|
|
|
let Inst{12} = 0b1;
|
|
|
|
}
|
|
|
|
|
|
|
|
def MVE_VCMPf32r : MVE_VCMPqrf<"f32", 0b0>;
|
|
|
|
def MVE_VCMPf16r : MVE_VCMPqrf<"f16", 0b1>;
|
|
|
|
|
|
|
|
def MVE_VCMPi8r : MVE_VCMPqri<"i8", 0b00>;
|
|
|
|
def MVE_VCMPi16r : MVE_VCMPqri<"i16", 0b01>;
|
|
|
|
def MVE_VCMPi32r : MVE_VCMPqri<"i32", 0b10>;
|
|
|
|
|
|
|
|
def MVE_VCMPu8r : MVE_VCMPqru<"u8", 0b00>;
|
|
|
|
def MVE_VCMPu16r : MVE_VCMPqru<"u16", 0b01>;
|
|
|
|
def MVE_VCMPu32r : MVE_VCMPqru<"u32", 0b10>;
|
|
|
|
|
|
|
|
def MVE_VCMPs8r : MVE_VCMPqrs<"s8", 0b00>;
|
|
|
|
def MVE_VCMPs16r : MVE_VCMPqrs<"s16", 0b01>;
|
|
|
|
def MVE_VCMPs32r : MVE_VCMPqrs<"s32", 0b10>;
|
|
|
|
|
2019-07-25 01:36:47 +08:00
|
|
|
multiclass unpred_vcmp_z<string suffix, int fc> {
|
|
|
|
def i8 : Pat<(v16i1 (ARMvcmpz (v16i8 MQPR:$v1), (i32 fc))),
|
2019-07-24 19:08:14 +08:00
|
|
|
(v16i1 (!cast<Instruction>("MVE_VCMP"#suffix#"8r") (v16i8 MQPR:$v1), ZR, fc))>;
|
2019-07-25 01:36:47 +08:00
|
|
|
def i16 : Pat<(v8i1 (ARMvcmpz (v8i16 MQPR:$v1), (i32 fc))),
|
2019-07-24 19:08:14 +08:00
|
|
|
(v8i1 (!cast<Instruction>("MVE_VCMP"#suffix#"16r") (v8i16 MQPR:$v1), ZR, fc))>;
|
2019-07-25 01:36:47 +08:00
|
|
|
def i32 : Pat<(v4i1 (ARMvcmpz (v4i32 MQPR:$v1), (i32 fc))),
|
2019-07-24 19:08:14 +08:00
|
|
|
(v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32r") (v4i32 MQPR:$v1), ZR, fc))>;
|
2019-07-24 22:42:05 +08:00
|
|
|
|
2019-07-25 01:36:47 +08:00
|
|
|
def : Pat<(v16i1 (and (v16i1 VCCR:$p1), (v16i1 (ARMvcmpz (v16i8 MQPR:$v1), (i32 fc))))),
|
2019-07-24 22:42:05 +08:00
|
|
|
(v16i1 (!cast<Instruction>("MVE_VCMP"#suffix#"8r") (v16i8 MQPR:$v1), ZR, fc, 1, VCCR:$p1))>;
|
2019-07-25 01:36:47 +08:00
|
|
|
def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmpz (v8i16 MQPR:$v1), (i32 fc))))),
|
2019-07-24 22:42:05 +08:00
|
|
|
(v8i1 (!cast<Instruction>("MVE_VCMP"#suffix#"16r") (v8i16 MQPR:$v1), ZR, fc, 1, VCCR:$p1))>;
|
2019-07-25 01:36:47 +08:00
|
|
|
def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmpz (v4i32 MQPR:$v1), (i32 fc))))),
|
2019-07-24 22:42:05 +08:00
|
|
|
(v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32r") (v4i32 MQPR:$v1), ZR, fc, 1, VCCR:$p1))>;
|
2019-07-24 19:08:14 +08:00
|
|
|
}
|
|
|
|
|
2019-07-25 01:36:47 +08:00
|
|
|
multiclass unpred_vcmp_r<string suffix, int fc> {
|
|
|
|
def i8 : Pat<(v16i1 (ARMvcmp (v16i8 MQPR:$v1), (v16i8 MQPR:$v2), (i32 fc))),
|
2019-07-24 19:08:14 +08:00
|
|
|
(v16i1 (!cast<Instruction>("MVE_VCMP"#suffix#"8") (v16i8 MQPR:$v1), (v16i8 MQPR:$v2), fc))>;
|
2019-07-25 01:36:47 +08:00
|
|
|
def i16 : Pat<(v8i1 (ARMvcmp (v8i16 MQPR:$v1), (v8i16 MQPR:$v2), (i32 fc))),
|
2019-07-24 19:08:14 +08:00
|
|
|
(v8i1 (!cast<Instruction>("MVE_VCMP"#suffix#"16") (v8i16 MQPR:$v1), (v8i16 MQPR:$v2), fc))>;
|
2019-07-25 01:36:47 +08:00
|
|
|
def i32 : Pat<(v4i1 (ARMvcmp (v4i32 MQPR:$v1), (v4i32 MQPR:$v2), (i32 fc))),
|
2019-07-24 19:08:14 +08:00
|
|
|
(v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32") (v4i32 MQPR:$v1), (v4i32 MQPR:$v2), fc))>;
|
2019-07-24 22:42:05 +08:00
|
|
|
|
2019-07-25 01:36:47 +08:00
|
|
|
def i8r : Pat<(v16i1 (ARMvcmp (v16i8 MQPR:$v1), (v16i8 (ARMvdup GPR:$v2)), (i32 fc))),
|
2019-07-25 00:58:41 +08:00
|
|
|
(v16i1 (!cast<Instruction>("MVE_VCMP"#suffix#"8r") (v16i8 MQPR:$v1), (i32 GPR:$v2), fc))>;
|
2019-07-25 01:36:47 +08:00
|
|
|
def i16r : Pat<(v8i1 (ARMvcmp (v8i16 MQPR:$v1), (v8i16 (ARMvdup GPR:$v2)), (i32 fc))),
|
2019-07-25 00:58:41 +08:00
|
|
|
(v8i1 (!cast<Instruction>("MVE_VCMP"#suffix#"16r") (v8i16 MQPR:$v1), (i32 GPR:$v2), fc))>;
|
2019-07-25 01:36:47 +08:00
|
|
|
def i32r : Pat<(v4i1 (ARMvcmp (v4i32 MQPR:$v1), (v4i32 (ARMvdup GPR:$v2)), (i32 fc))),
|
2019-07-25 00:58:41 +08:00
|
|
|
(v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32r") (v4i32 MQPR:$v1), (i32 GPR:$v2), fc))>;
|
|
|
|
|
2019-07-25 01:36:47 +08:00
|
|
|
def : Pat<(v16i1 (and (v16i1 VCCR:$p1), (v16i1 (ARMvcmp (v16i8 MQPR:$v1), (v16i8 MQPR:$v2), (i32 fc))))),
|
2019-07-24 22:42:05 +08:00
|
|
|
(v16i1 (!cast<Instruction>("MVE_VCMP"#suffix#"8") (v16i8 MQPR:$v1), (v16i8 MQPR:$v2), fc, 1, VCCR:$p1))>;
|
2019-07-25 01:36:47 +08:00
|
|
|
def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8i16 MQPR:$v1), (v8i16 MQPR:$v2), (i32 fc))))),
|
2019-07-24 22:42:05 +08:00
|
|
|
(v8i1 (!cast<Instruction>("MVE_VCMP"#suffix#"16") (v8i16 MQPR:$v1), (v8i16 MQPR:$v2), fc, 1, VCCR:$p1))>;
|
2019-07-25 01:36:47 +08:00
|
|
|
def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4i32 MQPR:$v1), (v4i32 MQPR:$v2), (i32 fc))))),
|
2019-07-24 22:42:05 +08:00
|
|
|
(v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32") (v4i32 MQPR:$v1), (v4i32 MQPR:$v2), fc, 1, VCCR:$p1))>;
|
2019-07-25 01:08:09 +08:00
|
|
|
|
2019-07-25 01:36:47 +08:00
|
|
|
def : Pat<(v16i1 (and (v16i1 VCCR:$p1), (v16i1 (ARMvcmp (v16i8 MQPR:$v1), (v16i8 (ARMvdup GPR:$v2)), (i32 fc))))),
|
2019-07-25 01:08:09 +08:00
|
|
|
(v16i1 (!cast<Instruction>("MVE_VCMP"#suffix#"8r") (v16i8 MQPR:$v1), (i32 GPR:$v2), fc, 1, VCCR:$p1))>;
|
2019-07-25 01:36:47 +08:00
|
|
|
def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8i16 MQPR:$v1), (v8i16 (ARMvdup GPR:$v2)), (i32 fc))))),
|
2019-07-25 01:08:09 +08:00
|
|
|
(v8i1 (!cast<Instruction>("MVE_VCMP"#suffix#"16r") (v8i16 MQPR:$v1), (i32 GPR:$v2), fc, 1, VCCR:$p1))>;
|
2019-07-25 01:36:47 +08:00
|
|
|
def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4i32 MQPR:$v1), (v4i32 (ARMvdup GPR:$v2)), (i32 fc))))),
|
2019-07-25 01:08:09 +08:00
|
|
|
(v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32r") (v4i32 MQPR:$v1), (i32 GPR:$v2), fc, 1, VCCR:$p1))>;
|
2019-07-24 19:08:14 +08:00
|
|
|
}
|
|
|
|
|
[ARM,MVE] Add reversed isel patterns for MVE `vcmp qN,rN`
Summary:
As well as vector/vector compare instructions, MVE also has a family
of comparisons taking a vector and a scalar, which compare every lane
of the vector against the same value. We generate those at isel time
using isel patterns that match `(ARMvcmp vector, (ARMvdup scalar))`.
This commit adds corresponding patterns for the operand-reversed form
`(ARMvcmp (ARMvdup scalar), vector)`, with condition codes swapped as
necessary. That way, we can still generate the vector/scalar compare
instruction if the IR happens to have been rearranged to put the
operands the other way round, which can happen in some optimization
phases. Previously, a vcmp the other way round was handled by emitting
a `vdup` instruction to //explicitly// replicate the scalar input into
a vector, and then doing a vector/vector comparison.
I haven't added a new test, because it turned out that several
existing tests were already exhibiting that failure mode. So just
updating the expected output in the existing MVE codegen tests
demonstrates what's been improved.
Reviewers: ostannard, MarkMurrayARM, dmgreen
Reviewed By: dmgreen
Subscribers: kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D70296
2019-11-15 22:05:02 +08:00
|
|
|
multiclass unpred_vcmp_r_reversible<string suffix, int fc, int fcReversed> {
|
|
|
|
defm "": unpred_vcmp_r<suffix, fc>;
|
|
|
|
|
|
|
|
// Additional patterns that match the vector/scalar comparisons the
|
|
|
|
// opposite way round, with the ARMvdup in the first operand of the
|
|
|
|
// ARMvcmp. These will usually need a different condition code
|
|
|
|
// (except for the symmetric conditions EQ and NE). They're in a
|
|
|
|
// separate multiclass because the unsigned CS and HI comparisons
|
|
|
|
// don't have reversed forms.
|
|
|
|
|
|
|
|
def : Pat<(v16i1 (ARMvcmp (v16i8 (ARMvdup GPR:$v1)), (v16i8 MQPR:$v2), (i32 fc))),
|
|
|
|
(v16i1 (!cast<Instruction>("MVE_VCMP"#suffix#"8r") (v16i8 MQPR:$v2), (i32 GPR:$v1), fcReversed))>;
|
|
|
|
def : Pat<(v8i1 (ARMvcmp (v8i16 (ARMvdup GPR:$v1)), (v8i16 MQPR:$v2), (i32 fc))),
|
|
|
|
(v8i1 (!cast<Instruction>("MVE_VCMP"#suffix#"16r") (v8i16 MQPR:$v2), (i32 GPR:$v1), fcReversed))>;
|
|
|
|
def : Pat<(v4i1 (ARMvcmp (v4i32 (ARMvdup GPR:$v1)), (v4i32 MQPR:$v2), (i32 fc))),
|
|
|
|
(v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32r") (v4i32 MQPR:$v2), (i32 GPR:$v1), fcReversed))>;
|
|
|
|
|
|
|
|
def : Pat<(v16i1 (and (v16i1 VCCR:$p1), (v16i1 (ARMvcmp (v16i8 (ARMvdup GPR:$v1)), (v16i8 MQPR:$v2), (i32 fc))))),
|
|
|
|
(v16i1 (!cast<Instruction>("MVE_VCMP"#suffix#"8r") (v16i8 MQPR:$v2), (i32 GPR:$v1), fcReversed, 1, VCCR:$p1))>;
|
|
|
|
def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8i16 (ARMvdup GPR:$v1)), (v8i16 MQPR:$v2), (i32 fc))))),
|
|
|
|
(v8i1 (!cast<Instruction>("MVE_VCMP"#suffix#"16r") (v8i16 MQPR:$v2), (i32 GPR:$v1), fcReversed, 1, VCCR:$p1))>;
|
|
|
|
def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4i32 (ARMvdup GPR:$v1)), (v4i32 MQPR:$v2), (i32 fc))))),
|
|
|
|
(v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32r") (v4i32 MQPR:$v2), (i32 GPR:$v1), fcReversed, 1, VCCR:$p1))>;
|
|
|
|
}
|
|
|
|
|
2019-07-25 01:36:47 +08:00
|
|
|
multiclass unpred_vcmpf_z<int fc> {
|
|
|
|
def f16 : Pat<(v8i1 (ARMvcmpz (v8f16 MQPR:$v1), (i32 fc))),
|
2019-07-24 22:28:22 +08:00
|
|
|
(v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), ZR, fc))>;
|
2019-07-25 01:36:47 +08:00
|
|
|
def f32 : Pat<(v4i1 (ARMvcmpz (v4f32 MQPR:$v1), (i32 fc))),
|
2019-07-24 22:28:22 +08:00
|
|
|
(v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), ZR, fc))>;
|
2019-07-24 22:42:05 +08:00
|
|
|
|
2019-07-25 01:36:47 +08:00
|
|
|
def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmpz (v8f16 MQPR:$v1), (i32 fc))))),
|
2019-07-24 22:42:05 +08:00
|
|
|
(v8i1 (MVE_VCMPf32r (v8f16 MQPR:$v1), ZR, fc, 1, VCCR:$p1))>;
|
2019-07-25 01:36:47 +08:00
|
|
|
def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmpz (v4f32 MQPR:$v1), (i32 fc))))),
|
2019-07-24 22:42:05 +08:00
|
|
|
(v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), ZR, fc, 1, VCCR:$p1))>;
|
2019-07-24 22:28:22 +08:00
|
|
|
}
|
|
|
|
|
[ARM,MVE] Add reversed isel patterns for MVE `vcmp qN,rN`
Summary:
As well as vector/vector compare instructions, MVE also has a family
of comparisons taking a vector and a scalar, which compare every lane
of the vector against the same value. We generate those at isel time
using isel patterns that match `(ARMvcmp vector, (ARMvdup scalar))`.
This commit adds corresponding patterns for the operand-reversed form
`(ARMvcmp (ARMvdup scalar), vector)`, with condition codes swapped as
necessary. That way, we can still generate the vector/scalar compare
instruction if the IR happens to have been rearranged to put the
operands the other way round, which can happen in some optimization
phases. Previously, a vcmp the other way round was handled by emitting
a `vdup` instruction to //explicitly// replicate the scalar input into
a vector, and then doing a vector/vector comparison.
I haven't added a new test, because it turned out that several
existing tests were already exhibiting that failure mode. So just
updating the expected output in the existing MVE codegen tests
demonstrates what's been improved.
Reviewers: ostannard, MarkMurrayARM, dmgreen
Reviewed By: dmgreen
Subscribers: kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D70296
2019-11-15 22:05:02 +08:00
|
|
|
multiclass unpred_vcmpf_r<int fc, int fcReversed> {
|
2019-07-25 01:36:47 +08:00
|
|
|
def f16 : Pat<(v8i1 (ARMvcmp (v8f16 MQPR:$v1), (v8f16 MQPR:$v2), (i32 fc))),
|
2019-07-24 22:28:22 +08:00
|
|
|
(v8i1 (MVE_VCMPf16 (v8f16 MQPR:$v1), (v8f16 MQPR:$v2), fc))>;
|
2019-07-25 01:36:47 +08:00
|
|
|
def f32 : Pat<(v4i1 (ARMvcmp (v4f32 MQPR:$v1), (v4f32 MQPR:$v2), (i32 fc))),
|
2019-07-24 22:28:22 +08:00
|
|
|
(v4i1 (MVE_VCMPf32 (v4f32 MQPR:$v1), (v4f32 MQPR:$v2), fc))>;
|
2019-07-24 22:42:05 +08:00
|
|
|
|
2019-07-25 01:36:47 +08:00
|
|
|
def f16r : Pat<(v8i1 (ARMvcmp (v8f16 MQPR:$v1), (v8f16 (ARMvdup HPR:$v2)), (i32 fc))),
|
2019-07-25 00:58:41 +08:00
|
|
|
(v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), (i32 (COPY_TO_REGCLASS (f16 HPR:$v2), rGPR)), fc))>;
|
2019-07-25 01:36:47 +08:00
|
|
|
def f32r : Pat<(v4i1 (ARMvcmp (v4f32 MQPR:$v1), (v4f32 (ARMvdup SPR:$v2)), (i32 fc))),
|
2019-07-25 00:58:41 +08:00
|
|
|
(v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), (i32 (COPY_TO_REGCLASS (f32 SPR:$v2), rGPR)), fc))>;
|
|
|
|
|
[ARM,MVE] Add reversed isel patterns for MVE `vcmp qN,rN`
Summary:
As well as vector/vector compare instructions, MVE also has a family
of comparisons taking a vector and a scalar, which compare every lane
of the vector against the same value. We generate those at isel time
using isel patterns that match `(ARMvcmp vector, (ARMvdup scalar))`.
This commit adds corresponding patterns for the operand-reversed form
`(ARMvcmp (ARMvdup scalar), vector)`, with condition codes swapped as
necessary. That way, we can still generate the vector/scalar compare
instruction if the IR happens to have been rearranged to put the
operands the other way round, which can happen in some optimization
phases. Previously, a vcmp the other way round was handled by emitting
a `vdup` instruction to //explicitly// replicate the scalar input into
a vector, and then doing a vector/vector comparison.
I haven't added a new test, because it turned out that several
existing tests were already exhibiting that failure mode. So just
updating the expected output in the existing MVE codegen tests
demonstrates what's been improved.
Reviewers: ostannard, MarkMurrayARM, dmgreen
Reviewed By: dmgreen
Subscribers: kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D70296
2019-11-15 22:05:02 +08:00
|
|
|
def : Pat<(v8i1 (ARMvcmp (v8f16 (ARMvdup HPR:$v1)), (v8f16 MQPR:$v2), (i32 fc))),
|
|
|
|
(v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v2), (i32 (COPY_TO_REGCLASS (f16 HPR:$v1), rGPR)), fcReversed))>;
|
|
|
|
def : Pat<(v4i1 (ARMvcmp (v4f32 (ARMvdup SPR:$v1)), (v4f32 MQPR:$v2), (i32 fc))),
|
|
|
|
(v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v2), (i32 (COPY_TO_REGCLASS (f32 SPR:$v1), rGPR)), fcReversed))>;
|
|
|
|
|
2019-07-25 01:36:47 +08:00
|
|
|
def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8f16 MQPR:$v1), (v8f16 MQPR:$v2), (i32 fc))))),
|
2019-07-24 22:42:05 +08:00
|
|
|
(v8i1 (MVE_VCMPf16 (v8f16 MQPR:$v1), (v8f16 MQPR:$v2), fc, 1, VCCR:$p1))>;
|
2019-07-25 01:36:47 +08:00
|
|
|
def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4f32 MQPR:$v1), (v4f32 MQPR:$v2), (i32 fc))))),
|
2019-07-24 22:42:05 +08:00
|
|
|
(v4i1 (MVE_VCMPf32 (v4f32 MQPR:$v1), (v4f32 MQPR:$v2), fc, 1, VCCR:$p1))>;
|
2019-07-25 01:08:09 +08:00
|
|
|
|
2019-07-25 01:36:47 +08:00
|
|
|
def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8f16 MQPR:$v1), (v8f16 (ARMvdup HPR:$v2)), (i32 fc))))),
|
2019-07-25 01:08:09 +08:00
|
|
|
(v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), (i32 (COPY_TO_REGCLASS (f16 HPR:$v2), rGPR)), fc, 1, VCCR:$p1))>;
|
2019-07-25 01:36:47 +08:00
|
|
|
def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4f32 MQPR:$v1), (v4f32 (ARMvdup SPR:$v2)), (i32 fc))))),
|
2019-07-25 01:08:09 +08:00
|
|
|
(v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), (i32 (COPY_TO_REGCLASS (f32 SPR:$v2), rGPR)), fc, 1, VCCR:$p1))>;
|
[ARM,MVE] Add reversed isel patterns for MVE `vcmp qN,rN`
Summary:
As well as vector/vector compare instructions, MVE also has a family
of comparisons taking a vector and a scalar, which compare every lane
of the vector against the same value. We generate those at isel time
using isel patterns that match `(ARMvcmp vector, (ARMvdup scalar))`.
This commit adds corresponding patterns for the operand-reversed form
`(ARMvcmp (ARMvdup scalar), vector)`, with condition codes swapped as
necessary. That way, we can still generate the vector/scalar compare
instruction if the IR happens to have been rearranged to put the
operands the other way round, which can happen in some optimization
phases. Previously, a vcmp the other way round was handled by emitting
a `vdup` instruction to //explicitly// replicate the scalar input into
a vector, and then doing a vector/vector comparison.
I haven't added a new test, because it turned out that several
existing tests were already exhibiting that failure mode. So just
updating the expected output in the existing MVE codegen tests
demonstrates what's been improved.
Reviewers: ostannard, MarkMurrayARM, dmgreen
Reviewed By: dmgreen
Subscribers: kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D70296
2019-11-15 22:05:02 +08:00
|
|
|
|
|
|
|
def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8f16 (ARMvdup HPR:$v1)), (v8f16 MQPR:$v2), (i32 fc))))),
|
|
|
|
(v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v2), (i32 (COPY_TO_REGCLASS (f16 HPR:$v1), rGPR)), fcReversed, 1, VCCR:$p1))>;
|
|
|
|
def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4f32 (ARMvdup SPR:$v1)), (v4f32 MQPR:$v2), (i32 fc))))),
|
|
|
|
(v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v2), (i32 (COPY_TO_REGCLASS (f32 SPR:$v1), rGPR)), fcReversed, 1, VCCR:$p1))>;
|
2019-07-24 22:28:22 +08:00
|
|
|
}
|
|
|
|
|
2019-07-24 19:08:14 +08:00
|
|
|
let Predicates = [HasMVEInt] in {
|
2019-07-25 01:36:47 +08:00
|
|
|
defm MVE_VCEQZ : unpred_vcmp_z<"i", 0>;
|
|
|
|
defm MVE_VCNEZ : unpred_vcmp_z<"i", 1>;
|
|
|
|
defm MVE_VCGEZ : unpred_vcmp_z<"s", 10>;
|
|
|
|
defm MVE_VCLTZ : unpred_vcmp_z<"s", 11>;
|
|
|
|
defm MVE_VCGTZ : unpred_vcmp_z<"s", 12>;
|
|
|
|
defm MVE_VCLEZ : unpred_vcmp_z<"s", 13>;
|
|
|
|
defm MVE_VCGTUZ : unpred_vcmp_z<"u", 8>;
|
|
|
|
defm MVE_VCGEUZ : unpred_vcmp_z<"u", 2>;
|
|
|
|
|
[ARM,MVE] Add reversed isel patterns for MVE `vcmp qN,rN`
Summary:
As well as vector/vector compare instructions, MVE also has a family
of comparisons taking a vector and a scalar, which compare every lane
of the vector against the same value. We generate those at isel time
using isel patterns that match `(ARMvcmp vector, (ARMvdup scalar))`.
This commit adds corresponding patterns for the operand-reversed form
`(ARMvcmp (ARMvdup scalar), vector)`, with condition codes swapped as
necessary. That way, we can still generate the vector/scalar compare
instruction if the IR happens to have been rearranged to put the
operands the other way round, which can happen in some optimization
phases. Previously, a vcmp the other way round was handled by emitting
a `vdup` instruction to //explicitly// replicate the scalar input into
a vector, and then doing a vector/vector comparison.
I haven't added a new test, because it turned out that several
existing tests were already exhibiting that failure mode. So just
updating the expected output in the existing MVE codegen tests
demonstrates what's been improved.
Reviewers: ostannard, MarkMurrayARM, dmgreen
Reviewed By: dmgreen
Subscribers: kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D70296
2019-11-15 22:05:02 +08:00
|
|
|
defm MVE_VCEQ : unpred_vcmp_r_reversible<"i", 0, 0>;
|
|
|
|
defm MVE_VCNE : unpred_vcmp_r_reversible<"i", 1, 1>;
|
|
|
|
defm MVE_VCGE : unpred_vcmp_r_reversible<"s", 10, 13>;
|
|
|
|
defm MVE_VCLT : unpred_vcmp_r_reversible<"s", 11, 12>;
|
|
|
|
defm MVE_VCGT : unpred_vcmp_r_reversible<"s", 12, 11>;
|
|
|
|
defm MVE_VCLE : unpred_vcmp_r_reversible<"s", 13, 10>;
|
2019-07-25 01:36:47 +08:00
|
|
|
defm MVE_VCGTU : unpred_vcmp_r<"u", 8>;
|
|
|
|
defm MVE_VCGEU : unpred_vcmp_r<"u", 2>;
|
2019-07-24 19:08:14 +08:00
|
|
|
}
|
|
|
|
|
2019-07-24 22:28:22 +08:00
|
|
|
let Predicates = [HasMVEFloat] in {
|
2019-07-25 01:36:47 +08:00
|
|
|
defm MVE_VFCEQZ : unpred_vcmpf_z<0>;
|
|
|
|
defm MVE_VFCNEZ : unpred_vcmpf_z<1>;
|
|
|
|
defm MVE_VFCGEZ : unpred_vcmpf_z<10>;
|
|
|
|
defm MVE_VFCLTZ : unpred_vcmpf_z<11>;
|
|
|
|
defm MVE_VFCGTZ : unpred_vcmpf_z<12>;
|
|
|
|
defm MVE_VFCLEZ : unpred_vcmpf_z<13>;
|
|
|
|
|
[ARM,MVE] Add reversed isel patterns for MVE `vcmp qN,rN`
Summary:
As well as vector/vector compare instructions, MVE also has a family
of comparisons taking a vector and a scalar, which compare every lane
of the vector against the same value. We generate those at isel time
using isel patterns that match `(ARMvcmp vector, (ARMvdup scalar))`.
This commit adds corresponding patterns for the operand-reversed form
`(ARMvcmp (ARMvdup scalar), vector)`, with condition codes swapped as
necessary. That way, we can still generate the vector/scalar compare
instruction if the IR happens to have been rearranged to put the
operands the other way round, which can happen in some optimization
phases. Previously, a vcmp the other way round was handled by emitting
a `vdup` instruction to //explicitly// replicate the scalar input into
a vector, and then doing a vector/vector comparison.
I haven't added a new test, because it turned out that several
existing tests were already exhibiting that failure mode. So just
updating the expected output in the existing MVE codegen tests
demonstrates what's been improved.
Reviewers: ostannard, MarkMurrayARM, dmgreen
Reviewed By: dmgreen
Subscribers: kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D70296
2019-11-15 22:05:02 +08:00
|
|
|
defm MVE_VFCEQ : unpred_vcmpf_r<0, 0>;
|
|
|
|
defm MVE_VFCNE : unpred_vcmpf_r<1, 1>;
|
|
|
|
defm MVE_VFCGE : unpred_vcmpf_r<10, 13>;
|
|
|
|
defm MVE_VFCLT : unpred_vcmpf_r<11, 12>;
|
|
|
|
defm MVE_VFCGT : unpred_vcmpf_r<12, 11>;
|
|
|
|
defm MVE_VFCLE : unpred_vcmpf_r<13, 10>;
|
2019-07-24 22:28:22 +08:00
|
|
|
}
|
|
|
|
|
2019-07-24 22:17:54 +08:00
|
|
|
|
|
|
|
// Extra "worst case" and/or/xor partterns, going into and out of GRP
|
|
|
|
multiclass two_predops<SDPatternOperator opnode, Instruction insn> {
|
|
|
|
def v16i1 : Pat<(v16i1 (opnode (v16i1 VCCR:$p1), (v16i1 VCCR:$p2))),
|
|
|
|
(v16i1 (COPY_TO_REGCLASS
|
|
|
|
(insn (i32 (COPY_TO_REGCLASS (v16i1 VCCR:$p1), rGPR)),
|
|
|
|
(i32 (COPY_TO_REGCLASS (v16i1 VCCR:$p2), rGPR))),
|
|
|
|
VCCR))>;
|
|
|
|
def v8i1 : Pat<(v8i1 (opnode (v8i1 VCCR:$p1), (v8i1 VCCR:$p2))),
|
|
|
|
(v8i1 (COPY_TO_REGCLASS
|
|
|
|
(insn (i32 (COPY_TO_REGCLASS (v8i1 VCCR:$p1), rGPR)),
|
|
|
|
(i32 (COPY_TO_REGCLASS (v8i1 VCCR:$p2), rGPR))),
|
|
|
|
VCCR))>;
|
|
|
|
def v4i1 : Pat<(v4i1 (opnode (v4i1 VCCR:$p1), (v4i1 VCCR:$p2))),
|
|
|
|
(v4i1 (COPY_TO_REGCLASS
|
|
|
|
(insn (i32 (COPY_TO_REGCLASS (v4i1 VCCR:$p1), rGPR)),
|
|
|
|
(i32 (COPY_TO_REGCLASS (v4i1 VCCR:$p2), rGPR))),
|
|
|
|
VCCR))>;
|
|
|
|
}
|
|
|
|
|
|
|
|
let Predicates = [HasMVEInt] in {
|
|
|
|
defm POR : two_predops<or, t2ORRrr>;
|
|
|
|
defm PAND : two_predops<and, t2ANDrr>;
|
|
|
|
defm PEOR : two_predops<xor, t2EORrr>;
|
|
|
|
}
|
|
|
|
|
2019-07-24 19:51:36 +08:00
|
|
|
// Occasionally we need to cast between a i32 and a boolean vector, for
|
|
|
|
// example when moving between rGPR and VPR.P0 as part of predicate vector
|
|
|
|
// shuffles. We also sometimes need to cast between different predicate
|
|
|
|
// vector types (v4i1<>v8i1, etc.) also as part of lowering vector shuffles.
|
|
|
|
|
|
|
|
def predicate_cast : SDNode<"ARMISD::PREDICATE_CAST", SDTUnaryOp>;
|
|
|
|
|
|
|
|
let Predicates = [HasMVEInt] in {
|
|
|
|
foreach VT = [ v4i1, v8i1, v16i1 ] in {
|
|
|
|
def : Pat<(i32 (predicate_cast (VT VCCR:$src))),
|
|
|
|
(i32 (COPY_TO_REGCLASS (VT VCCR:$src), VCCR))>;
|
|
|
|
def : Pat<(VT (predicate_cast (i32 VCCR:$src))),
|
|
|
|
(VT (COPY_TO_REGCLASS (i32 VCCR:$src), VCCR))>;
|
|
|
|
|
|
|
|
foreach VT2 = [ v4i1, v8i1, v16i1 ] in
|
|
|
|
def : Pat<(VT (predicate_cast (VT2 VCCR:$src))),
|
|
|
|
(VT (COPY_TO_REGCLASS (VT2 VCCR:$src), VCCR))>;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-06-21 19:14:51 +08:00
|
|
|
// end of MVE compares
|
|
|
|
|
2019-06-21 20:13:59 +08:00
|
|
|
// start of MVE_qDest_qSrc
|
|
|
|
|
|
|
|
class MVE_qDest_qSrc<string iname, string suffix, dag oops, dag iops,
|
|
|
|
string ops, vpred_ops vpred, string cstr,
|
|
|
|
list<dag> pattern=[]>
|
|
|
|
: MVE_p<oops, iops, NoItinerary, iname, suffix,
|
|
|
|
ops, vpred, cstr, pattern> {
|
|
|
|
bits<4> Qd;
|
|
|
|
bits<4> Qm;
|
|
|
|
|
|
|
|
let Inst{25-23} = 0b100;
|
|
|
|
let Inst{22} = Qd{3};
|
|
|
|
let Inst{15-13} = Qd{2-0};
|
|
|
|
let Inst{11-9} = 0b111;
|
|
|
|
let Inst{6} = 0b0;
|
|
|
|
let Inst{5} = Qm{3};
|
|
|
|
let Inst{4} = 0b0;
|
|
|
|
let Inst{3-1} = Qm{2-0};
|
|
|
|
}
|
|
|
|
|
|
|
|
class MVE_VQxDMLxDH<string iname, bit exch, bit round, bit subtract,
|
2019-09-13 19:20:17 +08:00
|
|
|
string suffix, bits<2> size, string cstr="", list<dag> pattern=[]>
|
2019-06-21 20:13:59 +08:00
|
|
|
: MVE_qDest_qSrc<iname, suffix, (outs MQPR:$Qd),
|
[ARM] Fix MVE_VQxDMLxDH instruction class
Summary:
According to the ARMARM, the VQDMLADH, VQRDMLADH, VQDMLSDH and
VQRDMLSDH instructions handle their results as follows: "The base
variant writes the results into the lower element of each pair of
elements in the destination register, whereas the exchange variant
writes to the upper element in each pair". I.e., the initial content
of the output register affects the result, as usual, we model this
with an additional input.
Also, for 32-bit variants Qd is not allowed to be the same register as
Qm and Qn, we use @earlyclobber to indicate this.
This patch also changes vpred_r to vpred_n because the instructions
don't have an explicit 'inactive' operand.
Reviewers: dmgreen, ostannard, simon_tatham
Reviewed By: simon_tatham
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D64007
llvm-svn: 364796
2019-07-02 00:07:58 +08:00
|
|
|
(ins MQPR:$Qd_src, MQPR:$Qn, MQPR:$Qm), "$Qd, $Qn, $Qm",
|
2019-09-13 19:20:17 +08:00
|
|
|
vpred_n, "$Qd = $Qd_src"#cstr, pattern> {
|
2019-06-21 20:13:59 +08:00
|
|
|
bits<4> Qn;
|
|
|
|
|
|
|
|
let Inst{28} = subtract;
|
|
|
|
let Inst{21-20} = size;
|
|
|
|
let Inst{19-17} = Qn{2-0};
|
|
|
|
let Inst{16} = 0b0;
|
|
|
|
let Inst{12} = exch;
|
|
|
|
let Inst{8} = 0b0;
|
|
|
|
let Inst{7} = Qn{3};
|
|
|
|
let Inst{0} = round;
|
|
|
|
}
|
|
|
|
|
|
|
|
multiclass MVE_VQxDMLxDH_multi<string iname, bit exch,
|
|
|
|
bit round, bit subtract> {
|
2019-07-08 17:44:52 +08:00
|
|
|
def s8 : MVE_VQxDMLxDH<iname, exch, round, subtract, "s8", 0b00>;
|
|
|
|
def s16 : MVE_VQxDMLxDH<iname, exch, round, subtract, "s16", 0b01>;
|
2019-09-13 19:20:17 +08:00
|
|
|
def s32 : MVE_VQxDMLxDH<iname, exch, round, subtract, "s32", 0b10, ",@earlyclobber $Qd">;
|
2019-06-21 20:13:59 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
defm MVE_VQDMLADH : MVE_VQxDMLxDH_multi<"vqdmladh", 0b0, 0b0, 0b0>;
|
|
|
|
defm MVE_VQDMLADHX : MVE_VQxDMLxDH_multi<"vqdmladhx", 0b1, 0b0, 0b0>;
|
|
|
|
defm MVE_VQRDMLADH : MVE_VQxDMLxDH_multi<"vqrdmladh", 0b0, 0b1, 0b0>;
|
|
|
|
defm MVE_VQRDMLADHX : MVE_VQxDMLxDH_multi<"vqrdmladhx", 0b1, 0b1, 0b0>;
|
|
|
|
defm MVE_VQDMLSDH : MVE_VQxDMLxDH_multi<"vqdmlsdh", 0b0, 0b0, 0b1>;
|
|
|
|
defm MVE_VQDMLSDHX : MVE_VQxDMLxDH_multi<"vqdmlsdhx", 0b1, 0b0, 0b1>;
|
|
|
|
defm MVE_VQRDMLSDH : MVE_VQxDMLxDH_multi<"vqrdmlsdh", 0b0, 0b1, 0b1>;
|
|
|
|
defm MVE_VQRDMLSDHX : MVE_VQxDMLxDH_multi<"vqrdmlsdhx", 0b1, 0b1, 0b1>;
|
|
|
|
|
2019-09-13 19:20:17 +08:00
|
|
|
class MVE_VCMUL<string iname, string suffix, bit size, string cstr="", list<dag> pattern=[]>
|
2019-06-21 20:13:59 +08:00
|
|
|
: MVE_qDest_qSrc<iname, suffix, (outs MQPR:$Qd),
|
|
|
|
(ins MQPR:$Qn, MQPR:$Qm, complexrotateop:$rot),
|
2019-09-13 19:20:17 +08:00
|
|
|
"$Qd, $Qn, $Qm, $rot", vpred_r, cstr, pattern> {
|
2019-06-21 20:13:59 +08:00
|
|
|
bits<4> Qn;
|
|
|
|
bits<2> rot;
|
|
|
|
|
|
|
|
let Inst{28} = size;
|
|
|
|
let Inst{21-20} = 0b11;
|
|
|
|
let Inst{19-17} = Qn{2-0};
|
|
|
|
let Inst{16} = 0b0;
|
|
|
|
let Inst{12} = rot{1};
|
|
|
|
let Inst{8} = 0b0;
|
|
|
|
let Inst{7} = Qn{3};
|
|
|
|
let Inst{0} = rot{0};
|
|
|
|
|
|
|
|
let Predicates = [HasMVEFloat];
|
|
|
|
}
|
|
|
|
|
|
|
|
def MVE_VCMULf16 : MVE_VCMUL<"vcmul", "f16", 0b0>;
|
2019-09-13 19:20:17 +08:00
|
|
|
def MVE_VCMULf32 : MVE_VCMUL<"vcmul", "f32", 0b1, "@earlyclobber $Qd">;
|
2019-06-21 20:13:59 +08:00
|
|
|
|
|
|
|
class MVE_VMULL<string iname, string suffix, bit bit_28, bits<2> bits_21_20,
|
2019-09-13 19:20:17 +08:00
|
|
|
bit T, string cstr, list<dag> pattern=[]>
|
2019-06-21 20:13:59 +08:00
|
|
|
: MVE_qDest_qSrc<iname, suffix, (outs MQPR:$Qd),
|
|
|
|
(ins MQPR:$Qn, MQPR:$Qm), "$Qd, $Qn, $Qm",
|
2019-09-13 19:20:17 +08:00
|
|
|
vpred_r, cstr, pattern> {
|
2019-06-21 20:13:59 +08:00
|
|
|
bits<4> Qd;
|
|
|
|
bits<4> Qn;
|
|
|
|
bits<4> Qm;
|
|
|
|
|
|
|
|
let Inst{28} = bit_28;
|
|
|
|
let Inst{21-20} = bits_21_20;
|
|
|
|
let Inst{19-17} = Qn{2-0};
|
|
|
|
let Inst{16} = 0b1;
|
|
|
|
let Inst{12} = T;
|
|
|
|
let Inst{8} = 0b0;
|
|
|
|
let Inst{7} = Qn{3};
|
|
|
|
let Inst{0} = 0b0;
|
|
|
|
}
|
|
|
|
|
|
|
|
multiclass MVE_VMULL_multi<string iname, string suffix,
|
2019-09-13 19:20:17 +08:00
|
|
|
bit bit_28, bits<2> bits_21_20, string cstr=""> {
|
|
|
|
def bh : MVE_VMULL<iname # "b", suffix, bit_28, bits_21_20, 0b0, cstr>;
|
|
|
|
def th : MVE_VMULL<iname # "t", suffix, bit_28, bits_21_20, 0b1, cstr>;
|
2019-06-21 20:13:59 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// For integer multiplies, bits 21:20 encode size, and bit 28 signedness.
|
|
|
|
// For polynomial multiplies, bits 21:20 take the unused value 0b11, and
|
|
|
|
// bit 28 switches to encoding the size.
|
|
|
|
|
|
|
|
defm MVE_VMULLs8 : MVE_VMULL_multi<"vmull", "s8", 0b0, 0b00>;
|
|
|
|
defm MVE_VMULLs16 : MVE_VMULL_multi<"vmull", "s16", 0b0, 0b01>;
|
2019-09-13 19:20:17 +08:00
|
|
|
defm MVE_VMULLs32 : MVE_VMULL_multi<"vmull", "s32", 0b0, 0b10, "@earlyclobber $Qd">;
|
2019-06-21 20:13:59 +08:00
|
|
|
defm MVE_VMULLu8 : MVE_VMULL_multi<"vmull", "u8", 0b1, 0b00>;
|
|
|
|
defm MVE_VMULLu16 : MVE_VMULL_multi<"vmull", "u16", 0b1, 0b01>;
|
2019-09-13 19:20:17 +08:00
|
|
|
defm MVE_VMULLu32 : MVE_VMULL_multi<"vmull", "u32", 0b1, 0b10, "@earlyclobber $Qd">;
|
2019-06-21 20:13:59 +08:00
|
|
|
defm MVE_VMULLp8 : MVE_VMULL_multi<"vmull", "p8", 0b0, 0b11>;
|
|
|
|
defm MVE_VMULLp16 : MVE_VMULL_multi<"vmull", "p16", 0b1, 0b11>;
|
|
|
|
|
|
|
|
class MVE_VxMULH<string iname, string suffix, bit U, bits<2> size,
|
|
|
|
bit round, list<dag> pattern=[]>
|
|
|
|
: MVE_qDest_qSrc<iname, suffix, (outs MQPR:$Qd),
|
|
|
|
(ins MQPR:$Qn, MQPR:$Qm), "$Qd, $Qn, $Qm",
|
|
|
|
vpred_r, "", pattern> {
|
|
|
|
bits<4> Qn;
|
|
|
|
|
|
|
|
let Inst{28} = U;
|
|
|
|
let Inst{21-20} = size;
|
|
|
|
let Inst{19-17} = Qn{2-0};
|
|
|
|
let Inst{16} = 0b1;
|
|
|
|
let Inst{12} = round;
|
|
|
|
let Inst{8} = 0b0;
|
|
|
|
let Inst{7} = Qn{3};
|
|
|
|
let Inst{0} = 0b1;
|
|
|
|
}
|
|
|
|
|
|
|
|
def MVE_VMULHs8 : MVE_VxMULH<"vmulh", "s8", 0b0, 0b00, 0b0>;
|
|
|
|
def MVE_VMULHs16 : MVE_VxMULH<"vmulh", "s16", 0b0, 0b01, 0b0>;
|
|
|
|
def MVE_VMULHs32 : MVE_VxMULH<"vmulh", "s32", 0b0, 0b10, 0b0>;
|
|
|
|
def MVE_VMULHu8 : MVE_VxMULH<"vmulh", "u8", 0b1, 0b00, 0b0>;
|
|
|
|
def MVE_VMULHu16 : MVE_VxMULH<"vmulh", "u16", 0b1, 0b01, 0b0>;
|
|
|
|
def MVE_VMULHu32 : MVE_VxMULH<"vmulh", "u32", 0b1, 0b10, 0b0>;
|
|
|
|
|
|
|
|
def MVE_VRMULHs8 : MVE_VxMULH<"vrmulh", "s8", 0b0, 0b00, 0b1>;
|
|
|
|
def MVE_VRMULHs16 : MVE_VxMULH<"vrmulh", "s16", 0b0, 0b01, 0b1>;
|
|
|
|
def MVE_VRMULHs32 : MVE_VxMULH<"vrmulh", "s32", 0b0, 0b10, 0b1>;
|
|
|
|
def MVE_VRMULHu8 : MVE_VxMULH<"vrmulh", "u8", 0b1, 0b00, 0b1>;
|
|
|
|
def MVE_VRMULHu16 : MVE_VxMULH<"vrmulh", "u16", 0b1, 0b01, 0b1>;
|
|
|
|
def MVE_VRMULHu32 : MVE_VxMULH<"vrmulh", "u32", 0b1, 0b10, 0b1>;
|
|
|
|
|
|
|
|
class MVE_VxMOVxN<string iname, string suffix, bit bit_28, bit bit_17,
|
|
|
|
bits<2> size, bit T, list<dag> pattern=[]>
|
|
|
|
: MVE_qDest_qSrc<iname, suffix, (outs MQPR:$Qd),
|
|
|
|
(ins MQPR:$Qd_src, MQPR:$Qm), "$Qd, $Qm",
|
|
|
|
vpred_n, "$Qd = $Qd_src", pattern> {
|
|
|
|
|
|
|
|
let Inst{28} = bit_28;
|
|
|
|
let Inst{21-20} = 0b11;
|
|
|
|
let Inst{19-18} = size;
|
|
|
|
let Inst{17} = bit_17;
|
|
|
|
let Inst{16} = 0b1;
|
|
|
|
let Inst{12} = T;
|
|
|
|
let Inst{8} = 0b0;
|
|
|
|
let Inst{7} = !if(!eq(bit_17, 0), 1, 0);
|
|
|
|
let Inst{0} = 0b1;
|
|
|
|
}
|
|
|
|
|
|
|
|
multiclass MVE_VxMOVxN_halves<string iname, string suffix,
|
|
|
|
bit bit_28, bit bit_17, bits<2> size> {
|
|
|
|
def bh : MVE_VxMOVxN<iname # "b", suffix, bit_28, bit_17, size, 0b0>;
|
|
|
|
def th : MVE_VxMOVxN<iname # "t", suffix, bit_28, bit_17, size, 0b1>;
|
|
|
|
}
|
|
|
|
|
|
|
|
defm MVE_VMOVNi16 : MVE_VxMOVxN_halves<"vmovn", "i16", 0b1, 0b0, 0b00>;
|
|
|
|
defm MVE_VMOVNi32 : MVE_VxMOVxN_halves<"vmovn", "i32", 0b1, 0b0, 0b01>;
|
|
|
|
defm MVE_VQMOVNs16 : MVE_VxMOVxN_halves<"vqmovn", "s16", 0b0, 0b1, 0b00>;
|
|
|
|
defm MVE_VQMOVNs32 : MVE_VxMOVxN_halves<"vqmovn", "s32", 0b0, 0b1, 0b01>;
|
|
|
|
defm MVE_VQMOVNu16 : MVE_VxMOVxN_halves<"vqmovn", "u16", 0b1, 0b1, 0b00>;
|
|
|
|
defm MVE_VQMOVNu32 : MVE_VxMOVxN_halves<"vqmovn", "u32", 0b1, 0b1, 0b01>;
|
|
|
|
defm MVE_VQMOVUNs16 : MVE_VxMOVxN_halves<"vqmovun", "s16", 0b0, 0b0, 0b00>;
|
|
|
|
defm MVE_VQMOVUNs32 : MVE_VxMOVxN_halves<"vqmovun", "s32", 0b0, 0b0, 0b01>;
|
|
|
|
|
[ARM] Selection for MVE VMOVN
The adds both VMOVNt and VMOVNb instruction selection from the appropriate
shuffles. We detect shuffle masks of the form:
0, N, 2, N+2, 4, N+4, ...
or
0, N+1, 2, N+3, 4, N+5, ...
ISel will also try the opposite patterns, with inputs reversed. These are
selected to VMOVNt and VMOVNb respectively.
Differential Revision: https://reviews.llvm.org/D68283
llvm-svn: 374781
2019-10-14 23:19:33 +08:00
|
|
|
def MVEvmovn : SDNode<"ARMISD::VMOVN", SDTARMVEXT>;
|
|
|
|
let Predicates = [HasMVEInt] in {
|
|
|
|
def : Pat<(v8i16 (MVEvmovn (v8i16 MQPR:$Qd_src), (v8i16 MQPR:$Qm), (i32 0))),
|
|
|
|
(v8i16 (MVE_VMOVNi32bh (v8i16 MQPR:$Qd_src), (v8i16 MQPR:$Qm)))>;
|
|
|
|
def : Pat<(v8i16 (MVEvmovn (v8i16 MQPR:$Qd_src), (v8i16 MQPR:$Qm), (i32 1))),
|
|
|
|
(v8i16 (MVE_VMOVNi32th (v8i16 MQPR:$Qd_src), (v8i16 MQPR:$Qm)))>;
|
|
|
|
def : Pat<(v16i8 (MVEvmovn (v16i8 MQPR:$Qd_src), (v16i8 MQPR:$Qm), (i32 0))),
|
|
|
|
(v16i8 (MVE_VMOVNi16bh (v16i8 MQPR:$Qd_src), (v16i8 MQPR:$Qm)))>;
|
|
|
|
def : Pat<(v16i8 (MVEvmovn (v16i8 MQPR:$Qd_src), (v16i8 MQPR:$Qm), (i32 1))),
|
|
|
|
(v16i8 (MVE_VMOVNi16th (v16i8 MQPR:$Qd_src), (v16i8 MQPR:$Qm)))>;
|
|
|
|
}
|
|
|
|
|
2019-06-21 20:13:59 +08:00
|
|
|
class MVE_VCVT_ff<string iname, string suffix, bit op, bit T,
|
|
|
|
list<dag> pattern=[]>
|
|
|
|
: MVE_qDest_qSrc<iname, suffix, (outs MQPR:$Qd), (ins MQPR:$Qd_src, MQPR:$Qm),
|
|
|
|
"$Qd, $Qm", vpred_n, "$Qd = $Qd_src", pattern> {
|
|
|
|
let Inst{28} = op;
|
|
|
|
let Inst{21-16} = 0b111111;
|
|
|
|
let Inst{12} = T;
|
|
|
|
let Inst{8-7} = 0b00;
|
|
|
|
let Inst{0} = 0b1;
|
|
|
|
|
|
|
|
let Predicates = [HasMVEFloat];
|
|
|
|
}
|
|
|
|
|
[ARM] Begin adding IR intrinsics for MVE instructions.
This commit, together with the next few, will add a representative
sample of the kind of IR intrinsics that we'll need in order to
implement the user-facing ACLE intrinsics for MVE. Supporting all of
them will take more work; the intention of this initial series of
commits is to implement an intrinsic or two from lots of different
categories, as examples and proofs of concept.
This initial commit introduces a small number of IR intrinsics for
instructions simple enough that they can use Tablegen ISel patterns:
the predicated versions of the VADD and VSUB instructions (both
integer and FP), VMIN and VMAX, and the float->half VCVT instruction
(predicated and unpredicated).
When using VPT-predicated instructions in automatic code generation,
it will be convenient to specify the predicate value as a vector of
the appropriate number of i1. To make it easy to specify all sizes of
an instruction in one go and give each one the matching predicate
vector type, I've added a system of Tablegen informational records
describing MVE's vector types: each one gives the underlying LLVM IR
ValueType (which may not be the same if the MVE vector is of
explicitly signed or unsigned integers) and an appropriate vNi1 to use
as the predicate vector.
(Also, those info records include the usual encoding for the types, so
that as we add associations between each instruction encoding and one
of the new `MVEVectorVTInfo` records, we can remove some of the
existing template parameters and replace them with references to the
vector type info's fields.)
The user-facing ACLE intrinsics will receive a predicate mask as a
16-bit integer, so I've also provided a pair of intrinsics i2v and
v2i, to convert between an integer and a vector of i1 by just changing
the register class.
Reviewers: dmgreen, miyuki, ostannard
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D67158
2019-10-08 00:00:51 +08:00
|
|
|
multiclass MVE_VCVT_f2h_m<string iname, int half> {
|
|
|
|
def "": MVE_VCVT_ff<iname, "f16.f32", 0b0, half>;
|
|
|
|
|
|
|
|
let Predicates = [HasMVEFloat] in {
|
|
|
|
def : Pat<(v8f16 (int_arm_mve_vcvt_narrow
|
|
|
|
(v8f16 MQPR:$Qd_src), (v4f32 MQPR:$Qm), (i32 half))),
|
|
|
|
(v8f16 (!cast<Instruction>(NAME)
|
|
|
|
(v8f16 MQPR:$Qd_src), (v4f32 MQPR:$Qm)))>;
|
|
|
|
def : Pat<(v8f16 (int_arm_mve_vcvt_narrow_predicated
|
|
|
|
(v8f16 MQPR:$Qd_src), (v4f32 MQPR:$Qm), (i32 half),
|
|
|
|
(v4i1 VCCR:$mask))),
|
|
|
|
(v8f16 (!cast<Instruction>(NAME)
|
|
|
|
(v8f16 MQPR:$Qd_src), (v4f32 MQPR:$Qm),
|
|
|
|
(i32 1), (v4i1 VCCR:$mask)))>;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
multiclass MVE_VCVT_h2f_m<string iname, int half> {
|
|
|
|
def "": MVE_VCVT_ff<iname, "f32.f16", 0b1, half>;
|
2019-06-21 20:13:59 +08:00
|
|
|
}
|
|
|
|
|
[ARM] Begin adding IR intrinsics for MVE instructions.
This commit, together with the next few, will add a representative
sample of the kind of IR intrinsics that we'll need in order to
implement the user-facing ACLE intrinsics for MVE. Supporting all of
them will take more work; the intention of this initial series of
commits is to implement an intrinsic or two from lots of different
categories, as examples and proofs of concept.
This initial commit introduces a small number of IR intrinsics for
instructions simple enough that they can use Tablegen ISel patterns:
the predicated versions of the VADD and VSUB instructions (both
integer and FP), VMIN and VMAX, and the float->half VCVT instruction
(predicated and unpredicated).
When using VPT-predicated instructions in automatic code generation,
it will be convenient to specify the predicate value as a vector of
the appropriate number of i1. To make it easy to specify all sizes of
an instruction in one go and give each one the matching predicate
vector type, I've added a system of Tablegen informational records
describing MVE's vector types: each one gives the underlying LLVM IR
ValueType (which may not be the same if the MVE vector is of
explicitly signed or unsigned integers) and an appropriate vNi1 to use
as the predicate vector.
(Also, those info records include the usual encoding for the types, so
that as we add associations between each instruction encoding and one
of the new `MVEVectorVTInfo` records, we can remove some of the
existing template parameters and replace them with references to the
vector type info's fields.)
The user-facing ACLE intrinsics will receive a predicate mask as a
16-bit integer, so I've also provided a pair of intrinsics i2v and
v2i, to convert between an integer and a vector of i1 by just changing
the register class.
Reviewers: dmgreen, miyuki, ostannard
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D67158
2019-10-08 00:00:51 +08:00
|
|
|
defm MVE_VCVTf16f32bh : MVE_VCVT_f2h_m<"vcvtb", 0b0>;
|
|
|
|
defm MVE_VCVTf16f32th : MVE_VCVT_f2h_m<"vcvtt", 0b1>;
|
|
|
|
defm MVE_VCVTf32f16bh : MVE_VCVT_h2f_m<"vcvtb", 0b0>;
|
|
|
|
defm MVE_VCVTf32f16th : MVE_VCVT_h2f_m<"vcvtt", 0b1>;
|
2019-06-21 20:13:59 +08:00
|
|
|
|
|
|
|
class MVE_VxCADD<string iname, string suffix, bits<2> size, bit halve,
|
2019-09-13 19:20:17 +08:00
|
|
|
string cstr="", list<dag> pattern=[]>
|
2019-06-21 20:13:59 +08:00
|
|
|
: MVE_qDest_qSrc<iname, suffix, (outs MQPR:$Qd),
|
|
|
|
(ins MQPR:$Qn, MQPR:$Qm, complexrotateopodd:$rot),
|
2019-09-13 19:20:17 +08:00
|
|
|
"$Qd, $Qn, $Qm, $rot", vpred_r, cstr, pattern> {
|
2019-06-21 20:13:59 +08:00
|
|
|
bits<4> Qn;
|
|
|
|
bit rot;
|
|
|
|
|
|
|
|
let Inst{28} = halve;
|
|
|
|
let Inst{21-20} = size;
|
|
|
|
let Inst{19-17} = Qn{2-0};
|
|
|
|
let Inst{16} = 0b0;
|
|
|
|
let Inst{12} = rot;
|
|
|
|
let Inst{8} = 0b1;
|
|
|
|
let Inst{7} = Qn{3};
|
|
|
|
let Inst{0} = 0b0;
|
|
|
|
}
|
|
|
|
|
|
|
|
def MVE_VCADDi8 : MVE_VxCADD<"vcadd", "i8", 0b00, 0b1>;
|
|
|
|
def MVE_VCADDi16 : MVE_VxCADD<"vcadd", "i16", 0b01, 0b1>;
|
2019-09-13 19:20:17 +08:00
|
|
|
def MVE_VCADDi32 : MVE_VxCADD<"vcadd", "i32", 0b10, 0b1, "@earlyclobber $Qd">;
|
2019-06-21 20:13:59 +08:00
|
|
|
|
|
|
|
def MVE_VHCADDs8 : MVE_VxCADD<"vhcadd", "s8", 0b00, 0b0>;
|
|
|
|
def MVE_VHCADDs16 : MVE_VxCADD<"vhcadd", "s16", 0b01, 0b0>;
|
2019-09-13 19:20:17 +08:00
|
|
|
def MVE_VHCADDs32 : MVE_VxCADD<"vhcadd", "s32", 0b10, 0b0, "@earlyclobber $Qd">;
|
2019-06-21 20:13:59 +08:00
|
|
|
|
|
|
|
class MVE_VADCSBC<string iname, bit I, bit subtract,
|
|
|
|
dag carryin, list<dag> pattern=[]>
|
|
|
|
: MVE_qDest_qSrc<iname, "i32", (outs MQPR:$Qd, cl_FPSCR_NZCV:$carryout),
|
|
|
|
!con((ins MQPR:$Qn, MQPR:$Qm), carryin),
|
|
|
|
"$Qd, $Qn, $Qm", vpred_r, "", pattern> {
|
|
|
|
bits<4> Qn;
|
|
|
|
|
|
|
|
let Inst{28} = subtract;
|
|
|
|
let Inst{21-20} = 0b11;
|
|
|
|
let Inst{19-17} = Qn{2-0};
|
|
|
|
let Inst{16} = 0b0;
|
|
|
|
let Inst{12} = I;
|
|
|
|
let Inst{8} = 0b1;
|
|
|
|
let Inst{7} = Qn{3};
|
|
|
|
let Inst{0} = 0b0;
|
|
|
|
|
|
|
|
// Custom decoder method in order to add the FPSCR operand(s), which
|
|
|
|
// Tablegen won't do right
|
|
|
|
let DecoderMethod = "DecodeMVEVADCInstruction";
|
|
|
|
}
|
|
|
|
|
|
|
|
def MVE_VADC : MVE_VADCSBC<"vadc", 0b0, 0b0, (ins cl_FPSCR_NZCV:$carryin)>;
|
|
|
|
def MVE_VADCI : MVE_VADCSBC<"vadci", 0b1, 0b0, (ins)>;
|
|
|
|
|
|
|
|
def MVE_VSBC : MVE_VADCSBC<"vsbc", 0b0, 0b1, (ins cl_FPSCR_NZCV:$carryin)>;
|
|
|
|
def MVE_VSBCI : MVE_VADCSBC<"vsbci", 0b1, 0b1, (ins)>;
|
|
|
|
|
|
|
|
class MVE_VQDMULL<string iname, string suffix, bit size, bit T,
|
2019-09-13 19:20:17 +08:00
|
|
|
string cstr="", list<dag> pattern=[]>
|
2019-06-21 20:13:59 +08:00
|
|
|
: MVE_qDest_qSrc<iname, suffix, (outs MQPR:$Qd),
|
|
|
|
(ins MQPR:$Qn, MQPR:$Qm), "$Qd, $Qn, $Qm",
|
2019-09-13 19:20:17 +08:00
|
|
|
vpred_r, cstr, pattern> {
|
2019-06-21 20:13:59 +08:00
|
|
|
bits<4> Qn;
|
|
|
|
|
|
|
|
let Inst{28} = size;
|
|
|
|
let Inst{21-20} = 0b11;
|
|
|
|
let Inst{19-17} = Qn{2-0};
|
|
|
|
let Inst{16} = 0b0;
|
|
|
|
let Inst{12} = T;
|
|
|
|
let Inst{8} = 0b1;
|
|
|
|
let Inst{7} = Qn{3};
|
|
|
|
let Inst{0} = 0b1;
|
|
|
|
}
|
|
|
|
|
2019-09-13 19:20:17 +08:00
|
|
|
multiclass MVE_VQDMULL_halves<string suffix, bit size, string cstr=""> {
|
|
|
|
def bh : MVE_VQDMULL<"vqdmullb", suffix, size, 0b0, cstr>;
|
|
|
|
def th : MVE_VQDMULL<"vqdmullt", suffix, size, 0b1, cstr>;
|
2019-06-21 20:13:59 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
defm MVE_VQDMULLs16 : MVE_VQDMULL_halves<"s16", 0b0>;
|
2019-09-13 19:20:17 +08:00
|
|
|
defm MVE_VQDMULLs32 : MVE_VQDMULL_halves<"s32", 0b1, "@earlyclobber $Qd">;
|
2019-06-21 20:13:59 +08:00
|
|
|
|
|
|
|
// end of mve_qDest_qSrc
|
|
|
|
|
2019-06-21 21:17:08 +08:00
|
|
|
// start of mve_qDest_rSrc
|
|
|
|
|
|
|
|
class MVE_qr_base<dag oops, dag iops, InstrItinClass itin, string iname,
|
|
|
|
string suffix, string ops, vpred_ops vpred, string cstr,
|
|
|
|
list<dag> pattern=[]>
|
|
|
|
: MVE_p<oops, iops, NoItinerary, iname, suffix, ops, vpred, cstr, pattern> {
|
|
|
|
bits<4> Qd;
|
|
|
|
bits<4> Qn;
|
|
|
|
bits<4> Rm;
|
|
|
|
|
|
|
|
let Inst{25-23} = 0b100;
|
|
|
|
let Inst{22} = Qd{3};
|
|
|
|
let Inst{19-17} = Qn{2-0};
|
|
|
|
let Inst{15-13} = Qd{2-0};
|
|
|
|
let Inst{11-9} = 0b111;
|
|
|
|
let Inst{7} = Qn{3};
|
|
|
|
let Inst{6} = 0b1;
|
|
|
|
let Inst{4} = 0b0;
|
|
|
|
let Inst{3-0} = Rm{3-0};
|
|
|
|
}
|
|
|
|
|
2019-09-13 19:20:17 +08:00
|
|
|
class MVE_qDest_rSrc<string iname, string suffix, string cstr="", list<dag> pattern=[]>
|
2019-06-21 21:17:08 +08:00
|
|
|
: MVE_qr_base<(outs MQPR:$Qd), (ins MQPR:$Qn, rGPR:$Rm),
|
2019-09-13 19:20:17 +08:00
|
|
|
NoItinerary, iname, suffix, "$Qd, $Qn, $Rm", vpred_r, cstr,
|
2019-06-21 21:17:08 +08:00
|
|
|
pattern>;
|
|
|
|
|
|
|
|
class MVE_qDestSrc_rSrc<string iname, string suffix, list<dag> pattern=[]>
|
|
|
|
: MVE_qr_base<(outs MQPR:$Qd), (ins MQPR:$Qd_src, MQPR:$Qn, rGPR:$Rm),
|
|
|
|
NoItinerary, iname, suffix, "$Qd, $Qn, $Rm", vpred_n, "$Qd = $Qd_src",
|
|
|
|
pattern>;
|
|
|
|
|
|
|
|
class MVE_qDest_single_rSrc<string iname, string suffix, list<dag> pattern=[]>
|
|
|
|
: MVE_p<(outs MQPR:$Qd), (ins MQPR:$Qd_src, rGPR:$Rm), NoItinerary, iname,
|
|
|
|
suffix, "$Qd, $Rm", vpred_n, "$Qd = $Qd_src", pattern> {
|
|
|
|
bits<4> Qd;
|
|
|
|
bits<4> Rm;
|
|
|
|
|
|
|
|
let Inst{22} = Qd{3};
|
|
|
|
let Inst{15-13} = Qd{2-0};
|
|
|
|
let Inst{3-0} = Rm{3-0};
|
|
|
|
}
|
|
|
|
|
|
|
|
class MVE_VADDSUB_qr<string iname, string suffix, bits<2> size,
|
|
|
|
bit bit_5, bit bit_12, bit bit_16,
|
|
|
|
bit bit_28, list<dag> pattern=[]>
|
2019-09-13 19:20:17 +08:00
|
|
|
: MVE_qDest_rSrc<iname, suffix, "", pattern> {
|
2019-06-21 21:17:08 +08:00
|
|
|
|
|
|
|
let Inst{28} = bit_28;
|
|
|
|
let Inst{21-20} = size;
|
|
|
|
let Inst{16} = bit_16;
|
|
|
|
let Inst{12} = bit_12;
|
|
|
|
let Inst{8} = 0b1;
|
|
|
|
let Inst{5} = bit_5;
|
2019-10-15 21:12:51 +08:00
|
|
|
let validForTailPredication = 1;
|
2019-06-21 21:17:08 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
multiclass MVE_VADDSUB_qr_sizes<string iname, string suffix,
|
|
|
|
bit bit_5, bit bit_12, bit bit_16,
|
|
|
|
bit bit_28, list<dag> pattern=[]> {
|
|
|
|
def "8" : MVE_VADDSUB_qr<iname, suffix#"8", 0b00,
|
|
|
|
bit_5, bit_12, bit_16, bit_28>;
|
|
|
|
def "16" : MVE_VADDSUB_qr<iname, suffix#"16", 0b01,
|
|
|
|
bit_5, bit_12, bit_16, bit_28>;
|
|
|
|
def "32" : MVE_VADDSUB_qr<iname, suffix#"32", 0b10,
|
|
|
|
bit_5, bit_12, bit_16, bit_28>;
|
|
|
|
}
|
|
|
|
|
|
|
|
defm MVE_VADD_qr_i : MVE_VADDSUB_qr_sizes<"vadd", "i", 0b0, 0b0, 0b1, 0b0>;
|
|
|
|
defm MVE_VQADD_qr_s : MVE_VADDSUB_qr_sizes<"vqadd", "s", 0b1, 0b0, 0b0, 0b0>;
|
|
|
|
defm MVE_VQADD_qr_u : MVE_VADDSUB_qr_sizes<"vqadd", "u", 0b1, 0b0, 0b0, 0b1>;
|
|
|
|
|
|
|
|
defm MVE_VSUB_qr_i : MVE_VADDSUB_qr_sizes<"vsub", "i", 0b0, 0b1, 0b1, 0b0>;
|
|
|
|
defm MVE_VQSUB_qr_s : MVE_VADDSUB_qr_sizes<"vqsub", "s", 0b1, 0b1, 0b0, 0b0>;
|
|
|
|
defm MVE_VQSUB_qr_u : MVE_VADDSUB_qr_sizes<"vqsub", "u", 0b1, 0b1, 0b0, 0b1>;
|
|
|
|
|
2019-09-07 01:02:35 +08:00
|
|
|
let Predicates = [HasMVEInt] in {
|
|
|
|
def : Pat<(v16i8 (add (v16i8 MQPR:$val1), (v16i8 (ARMvdup GPR:$val2)))),
|
|
|
|
(v16i8 (MVE_VADD_qr_i8 (v16i8 MQPR:$val1), (i32 GPR:$val2)))>;
|
|
|
|
def : Pat<(v8i16 (add (v8i16 MQPR:$val1), (v8i16 (ARMvdup GPR:$val2)))),
|
|
|
|
(v8i16 (MVE_VADD_qr_i16 (v8i16 MQPR:$val1), (i32 GPR:$val2)))>;
|
|
|
|
def : Pat<(v4i32 (add (v4i32 MQPR:$val1), (v4i32 (ARMvdup GPR:$val2)))),
|
|
|
|
(v4i32 (MVE_VADD_qr_i32 (v4i32 MQPR:$val1), (i32 GPR:$val2)))>;
|
|
|
|
}
|
|
|
|
|
2019-09-07 01:02:42 +08:00
|
|
|
let Predicates = [HasMVEInt] in {
|
|
|
|
def : Pat<(v16i8 (sub (v16i8 MQPR:$val1), (v16i8 (ARMvdup GPR:$val2)))),
|
|
|
|
(v16i8 (MVE_VSUB_qr_i8 (v16i8 MQPR:$val1), (i32 GPR:$val2)))>;
|
|
|
|
def : Pat<(v8i16 (sub (v8i16 MQPR:$val1), (v8i16 (ARMvdup GPR:$val2)))),
|
|
|
|
(v8i16 (MVE_VSUB_qr_i16 (v8i16 MQPR:$val1), (i32 GPR:$val2)))>;
|
|
|
|
def : Pat<(v4i32 (sub (v4i32 MQPR:$val1), (v4i32 (ARMvdup GPR:$val2)))),
|
|
|
|
(v4i32 (MVE_VSUB_qr_i32 (v4i32 MQPR:$val1), (i32 GPR:$val2)))>;
|
|
|
|
}
|
|
|
|
|
2019-06-21 21:17:08 +08:00
|
|
|
class MVE_VQDMULL_qr<string iname, string suffix, bit size,
|
2019-09-13 19:20:17 +08:00
|
|
|
bit T, string cstr="", list<dag> pattern=[]>
|
|
|
|
: MVE_qDest_rSrc<iname, suffix, cstr, pattern> {
|
2019-06-21 21:17:08 +08:00
|
|
|
|
|
|
|
let Inst{28} = size;
|
|
|
|
let Inst{21-20} = 0b11;
|
|
|
|
let Inst{16} = 0b0;
|
|
|
|
let Inst{12} = T;
|
|
|
|
let Inst{8} = 0b1;
|
|
|
|
let Inst{5} = 0b1;
|
|
|
|
}
|
|
|
|
|
2019-09-13 19:20:17 +08:00
|
|
|
multiclass MVE_VQDMULL_qr_halves<string suffix, bit size, string cstr=""> {
|
|
|
|
def bh : MVE_VQDMULL_qr<"vqdmullb", suffix, size, 0b0, cstr>;
|
|
|
|
def th : MVE_VQDMULL_qr<"vqdmullt", suffix, size, 0b1, cstr>;
|
2019-06-21 21:17:08 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
defm MVE_VQDMULL_qr_s16 : MVE_VQDMULL_qr_halves<"s16", 0b0>;
|
2019-09-13 19:20:17 +08:00
|
|
|
defm MVE_VQDMULL_qr_s32 : MVE_VQDMULL_qr_halves<"s32", 0b1, "@earlyclobber $Qd">;
|
2019-06-21 21:17:08 +08:00
|
|
|
|
|
|
|
class MVE_VxADDSUB_qr<string iname, string suffix,
|
|
|
|
bit bit_28, bits<2> bits_21_20, bit subtract,
|
|
|
|
list<dag> pattern=[]>
|
2019-09-13 19:20:17 +08:00
|
|
|
: MVE_qDest_rSrc<iname, suffix, "", pattern> {
|
2019-06-21 21:17:08 +08:00
|
|
|
|
|
|
|
let Inst{28} = bit_28;
|
|
|
|
let Inst{21-20} = bits_21_20;
|
|
|
|
let Inst{16} = 0b0;
|
|
|
|
let Inst{12} = subtract;
|
|
|
|
let Inst{8} = 0b1;
|
|
|
|
let Inst{5} = 0b0;
|
2019-10-15 21:12:51 +08:00
|
|
|
let validForTailPredication = 1;
|
2019-06-21 21:17:08 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
def MVE_VHADD_qr_s8 : MVE_VxADDSUB_qr<"vhadd", "s8", 0b0, 0b00, 0b0>;
|
|
|
|
def MVE_VHADD_qr_s16 : MVE_VxADDSUB_qr<"vhadd", "s16", 0b0, 0b01, 0b0>;
|
|
|
|
def MVE_VHADD_qr_s32 : MVE_VxADDSUB_qr<"vhadd", "s32", 0b0, 0b10, 0b0>;
|
|
|
|
def MVE_VHADD_qr_u8 : MVE_VxADDSUB_qr<"vhadd", "u8", 0b1, 0b00, 0b0>;
|
|
|
|
def MVE_VHADD_qr_u16 : MVE_VxADDSUB_qr<"vhadd", "u16", 0b1, 0b01, 0b0>;
|
|
|
|
def MVE_VHADD_qr_u32 : MVE_VxADDSUB_qr<"vhadd", "u32", 0b1, 0b10, 0b0>;
|
|
|
|
|
|
|
|
def MVE_VHSUB_qr_s8 : MVE_VxADDSUB_qr<"vhsub", "s8", 0b0, 0b00, 0b1>;
|
|
|
|
def MVE_VHSUB_qr_s16 : MVE_VxADDSUB_qr<"vhsub", "s16", 0b0, 0b01, 0b1>;
|
|
|
|
def MVE_VHSUB_qr_s32 : MVE_VxADDSUB_qr<"vhsub", "s32", 0b0, 0b10, 0b1>;
|
|
|
|
def MVE_VHSUB_qr_u8 : MVE_VxADDSUB_qr<"vhsub", "u8", 0b1, 0b00, 0b1>;
|
|
|
|
def MVE_VHSUB_qr_u16 : MVE_VxADDSUB_qr<"vhsub", "u16", 0b1, 0b01, 0b1>;
|
|
|
|
def MVE_VHSUB_qr_u32 : MVE_VxADDSUB_qr<"vhsub", "u32", 0b1, 0b10, 0b1>;
|
|
|
|
|
|
|
|
let Predicates = [HasMVEFloat] in {
|
|
|
|
def MVE_VADD_qr_f32 : MVE_VxADDSUB_qr<"vadd", "f32", 0b0, 0b11, 0b0>;
|
|
|
|
def MVE_VADD_qr_f16 : MVE_VxADDSUB_qr<"vadd", "f16", 0b1, 0b11, 0b0>;
|
|
|
|
|
|
|
|
def MVE_VSUB_qr_f32 : MVE_VxADDSUB_qr<"vsub", "f32", 0b0, 0b11, 0b1>;
|
|
|
|
def MVE_VSUB_qr_f16 : MVE_VxADDSUB_qr<"vsub", "f16", 0b1, 0b11, 0b1>;
|
|
|
|
}
|
|
|
|
|
|
|
|
class MVE_VxSHL_qr<string iname, string suffix, bit U, bits<2> size,
|
|
|
|
bit bit_7, bit bit_17, list<dag> pattern=[]>
|
|
|
|
: MVE_qDest_single_rSrc<iname, suffix, pattern> {
|
|
|
|
|
|
|
|
let Inst{28} = U;
|
|
|
|
let Inst{25-23} = 0b100;
|
|
|
|
let Inst{21-20} = 0b11;
|
|
|
|
let Inst{19-18} = size;
|
|
|
|
let Inst{17} = bit_17;
|
|
|
|
let Inst{16} = 0b1;
|
|
|
|
let Inst{12-8} = 0b11110;
|
|
|
|
let Inst{7} = bit_7;
|
|
|
|
let Inst{6-4} = 0b110;
|
2019-10-15 21:12:51 +08:00
|
|
|
let validForTailPredication = 1;
|
2019-06-21 21:17:08 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
multiclass MVE_VxSHL_qr_types<string iname, bit bit_7, bit bit_17> {
|
|
|
|
def s8 : MVE_VxSHL_qr<iname, "s8", 0b0, 0b00, bit_7, bit_17>;
|
|
|
|
def s16 : MVE_VxSHL_qr<iname, "s16", 0b0, 0b01, bit_7, bit_17>;
|
|
|
|
def s32 : MVE_VxSHL_qr<iname, "s32", 0b0, 0b10, bit_7, bit_17>;
|
|
|
|
def u8 : MVE_VxSHL_qr<iname, "u8", 0b1, 0b00, bit_7, bit_17>;
|
|
|
|
def u16 : MVE_VxSHL_qr<iname, "u16", 0b1, 0b01, bit_7, bit_17>;
|
|
|
|
def u32 : MVE_VxSHL_qr<iname, "u32", 0b1, 0b10, bit_7, bit_17>;
|
|
|
|
}
|
|
|
|
|
|
|
|
defm MVE_VSHL_qr : MVE_VxSHL_qr_types<"vshl", 0b0, 0b0>;
|
|
|
|
defm MVE_VRSHL_qr : MVE_VxSHL_qr_types<"vrshl", 0b0, 0b1>;
|
|
|
|
defm MVE_VQSHL_qr : MVE_VxSHL_qr_types<"vqshl", 0b1, 0b0>;
|
|
|
|
defm MVE_VQRSHL_qr : MVE_VxSHL_qr_types<"vqrshl", 0b1, 0b1>;
|
|
|
|
|
2019-07-15 19:35:39 +08:00
|
|
|
let Predicates = [HasMVEInt] in {
|
|
|
|
def : Pat<(v4i32 (ARMvshlu (v4i32 MQPR:$Qm), (v4i32 (ARMvdup GPR:$Rm)))),
|
|
|
|
(v4i32 (MVE_VSHL_qru32 (v4i32 MQPR:$Qm), GPR:$Rm))>;
|
|
|
|
def : Pat<(v8i16 (ARMvshlu (v8i16 MQPR:$Qm), (v8i16 (ARMvdup GPR:$Rm)))),
|
|
|
|
(v8i16 (MVE_VSHL_qru16 (v8i16 MQPR:$Qm), GPR:$Rm))>;
|
|
|
|
def : Pat<(v16i8 (ARMvshlu (v16i8 MQPR:$Qm), (v16i8 (ARMvdup GPR:$Rm)))),
|
|
|
|
(v16i8 (MVE_VSHL_qru8 (v16i8 MQPR:$Qm), GPR:$Rm))>;
|
|
|
|
|
|
|
|
def : Pat<(v4i32 (ARMvshls (v4i32 MQPR:$Qm), (v4i32 (ARMvdup GPR:$Rm)))),
|
|
|
|
(v4i32 (MVE_VSHL_qrs32 (v4i32 MQPR:$Qm), GPR:$Rm))>;
|
|
|
|
def : Pat<(v8i16 (ARMvshls (v8i16 MQPR:$Qm), (v8i16 (ARMvdup GPR:$Rm)))),
|
|
|
|
(v8i16 (MVE_VSHL_qrs16 (v8i16 MQPR:$Qm), GPR:$Rm))>;
|
|
|
|
def : Pat<(v16i8 (ARMvshls (v16i8 MQPR:$Qm), (v16i8 (ARMvdup GPR:$Rm)))),
|
|
|
|
(v16i8 (MVE_VSHL_qrs8 (v16i8 MQPR:$Qm), GPR:$Rm))>;
|
|
|
|
}
|
|
|
|
|
2019-06-21 21:17:08 +08:00
|
|
|
class MVE_VBRSR<string iname, string suffix, bits<2> size, list<dag> pattern=[]>
|
2019-09-13 19:20:17 +08:00
|
|
|
: MVE_qDest_rSrc<iname, suffix, "", pattern> {
|
2019-06-21 21:17:08 +08:00
|
|
|
|
|
|
|
let Inst{28} = 0b1;
|
|
|
|
let Inst{21-20} = size;
|
|
|
|
let Inst{16} = 0b1;
|
|
|
|
let Inst{12} = 0b1;
|
|
|
|
let Inst{8} = 0b0;
|
|
|
|
let Inst{5} = 0b1;
|
2019-10-15 21:12:51 +08:00
|
|
|
let validForTailPredication = 1;
|
2019-06-21 21:17:08 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
def MVE_VBRSR8 : MVE_VBRSR<"vbrsr", "8", 0b00>;
|
|
|
|
def MVE_VBRSR16 : MVE_VBRSR<"vbrsr", "16", 0b01>;
|
|
|
|
def MVE_VBRSR32 : MVE_VBRSR<"vbrsr", "32", 0b10>;
|
|
|
|
|
2019-09-16 23:20:03 +08:00
|
|
|
let Predicates = [HasMVEInt] in {
|
|
|
|
def : Pat<(v16i8 ( bitreverse (v16i8 MQPR:$val1))),
|
|
|
|
(v16i8 ( MVE_VBRSR8 (v16i8 MQPR:$val1), (t2MOVi (i32 8)) ))>;
|
|
|
|
|
|
|
|
def : Pat<(v4i32 ( bitreverse (v4i32 MQPR:$val1))),
|
|
|
|
(v4i32 ( MVE_VBRSR32 (v4i32 MQPR:$val1), (t2MOVi (i32 32)) ))>;
|
|
|
|
|
|
|
|
def : Pat<(v8i16 ( bitreverse (v8i16 MQPR:$val1))),
|
|
|
|
(v8i16 ( MVE_VBRSR16 (v8i16 MQPR:$val1), (t2MOVi (i32 16)) ))>;
|
|
|
|
}
|
|
|
|
|
2019-06-21 21:17:08 +08:00
|
|
|
class MVE_VMUL_qr_int<string iname, string suffix,
|
|
|
|
bits<2> size, list<dag> pattern=[]>
|
2019-09-13 19:20:17 +08:00
|
|
|
: MVE_qDest_rSrc<iname, suffix, "", pattern> {
|
2019-06-21 21:17:08 +08:00
|
|
|
|
|
|
|
let Inst{28} = 0b0;
|
|
|
|
let Inst{21-20} = size;
|
|
|
|
let Inst{16} = 0b1;
|
|
|
|
let Inst{12} = 0b1;
|
|
|
|
let Inst{8} = 0b0;
|
|
|
|
let Inst{5} = 0b1;
|
2019-10-15 21:12:51 +08:00
|
|
|
let validForTailPredication = 1;
|
2019-06-21 21:17:08 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
def MVE_VMUL_qr_i8 : MVE_VMUL_qr_int<"vmul", "i8", 0b00>;
|
|
|
|
def MVE_VMUL_qr_i16 : MVE_VMUL_qr_int<"vmul", "i16", 0b01>;
|
|
|
|
def MVE_VMUL_qr_i32 : MVE_VMUL_qr_int<"vmul", "i32", 0b10>;
|
|
|
|
|
2019-09-07 01:02:21 +08:00
|
|
|
let Predicates = [HasMVEInt] in {
|
|
|
|
def : Pat<(v16i8 (mul (v16i8 MQPR:$val1), (v16i8 (ARMvdup GPR:$val2)))),
|
|
|
|
(v16i8 (MVE_VMUL_qr_i8 (v16i8 MQPR:$val1), (i32 GPR:$val2)))>;
|
|
|
|
def : Pat<(v8i16 (mul (v8i16 MQPR:$val1), (v8i16 (ARMvdup GPR:$val2)))),
|
|
|
|
(v8i16 (MVE_VMUL_qr_i16 (v8i16 MQPR:$val1), (i32 GPR:$val2)))>;
|
|
|
|
def : Pat<(v4i32 (mul (v4i32 MQPR:$val1), (v4i32 (ARMvdup GPR:$val2)))),
|
|
|
|
(v4i32 (MVE_VMUL_qr_i32 (v4i32 MQPR:$val1), (i32 GPR:$val2)))>;
|
|
|
|
}
|
|
|
|
|
2019-06-21 21:17:08 +08:00
|
|
|
class MVE_VxxMUL_qr<string iname, string suffix,
|
|
|
|
bit bit_28, bits<2> bits_21_20, list<dag> pattern=[]>
|
2019-09-13 19:20:17 +08:00
|
|
|
: MVE_qDest_rSrc<iname, suffix, "", pattern> {
|
2019-06-21 21:17:08 +08:00
|
|
|
|
|
|
|
let Inst{28} = bit_28;
|
|
|
|
let Inst{21-20} = bits_21_20;
|
|
|
|
let Inst{16} = 0b1;
|
|
|
|
let Inst{12} = 0b0;
|
|
|
|
let Inst{8} = 0b0;
|
|
|
|
let Inst{5} = 0b1;
|
|
|
|
}
|
|
|
|
|
|
|
|
def MVE_VQDMULH_qr_s8 : MVE_VxxMUL_qr<"vqdmulh", "s8", 0b0, 0b00>;
|
|
|
|
def MVE_VQDMULH_qr_s16 : MVE_VxxMUL_qr<"vqdmulh", "s16", 0b0, 0b01>;
|
|
|
|
def MVE_VQDMULH_qr_s32 : MVE_VxxMUL_qr<"vqdmulh", "s32", 0b0, 0b10>;
|
|
|
|
|
|
|
|
def MVE_VQRDMULH_qr_s8 : MVE_VxxMUL_qr<"vqrdmulh", "s8", 0b1, 0b00>;
|
|
|
|
def MVE_VQRDMULH_qr_s16 : MVE_VxxMUL_qr<"vqrdmulh", "s16", 0b1, 0b01>;
|
|
|
|
def MVE_VQRDMULH_qr_s32 : MVE_VxxMUL_qr<"vqrdmulh", "s32", 0b1, 0b10>;
|
|
|
|
|
2019-10-15 21:12:51 +08:00
|
|
|
let Predicates = [HasMVEFloat], validForTailPredication = 1 in {
|
2019-06-21 21:17:08 +08:00
|
|
|
def MVE_VMUL_qr_f16 : MVE_VxxMUL_qr<"vmul", "f16", 0b1, 0b11>;
|
|
|
|
def MVE_VMUL_qr_f32 : MVE_VxxMUL_qr<"vmul", "f32", 0b0, 0b11>;
|
|
|
|
}
|
|
|
|
|
|
|
|
class MVE_VFMAMLA_qr<string iname, string suffix,
|
2019-07-31 18:08:09 +08:00
|
|
|
bit bit_28, bits<2> bits_21_20, bit S,
|
|
|
|
list<dag> pattern=[]>
|
2019-06-21 21:17:08 +08:00
|
|
|
: MVE_qDestSrc_rSrc<iname, suffix, pattern> {
|
|
|
|
|
|
|
|
let Inst{28} = bit_28;
|
|
|
|
let Inst{21-20} = bits_21_20;
|
|
|
|
let Inst{16} = 0b1;
|
|
|
|
let Inst{12} = S;
|
|
|
|
let Inst{8} = 0b0;
|
|
|
|
let Inst{5} = 0b0;
|
2019-10-15 21:12:51 +08:00
|
|
|
let validForTailPredication = 1;
|
2019-06-21 21:17:08 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
def MVE_VMLA_qr_s8 : MVE_VFMAMLA_qr<"vmla", "s8", 0b0, 0b00, 0b0>;
|
|
|
|
def MVE_VMLA_qr_s16 : MVE_VFMAMLA_qr<"vmla", "s16", 0b0, 0b01, 0b0>;
|
|
|
|
def MVE_VMLA_qr_s32 : MVE_VFMAMLA_qr<"vmla", "s32", 0b0, 0b10, 0b0>;
|
|
|
|
def MVE_VMLA_qr_u8 : MVE_VFMAMLA_qr<"vmla", "u8", 0b1, 0b00, 0b0>;
|
|
|
|
def MVE_VMLA_qr_u16 : MVE_VFMAMLA_qr<"vmla", "u16", 0b1, 0b01, 0b0>;
|
|
|
|
def MVE_VMLA_qr_u32 : MVE_VFMAMLA_qr<"vmla", "u32", 0b1, 0b10, 0b0>;
|
|
|
|
|
|
|
|
def MVE_VMLAS_qr_s8 : MVE_VFMAMLA_qr<"vmlas", "s8", 0b0, 0b00, 0b1>;
|
|
|
|
def MVE_VMLAS_qr_s16 : MVE_VFMAMLA_qr<"vmlas", "s16", 0b0, 0b01, 0b1>;
|
|
|
|
def MVE_VMLAS_qr_s32 : MVE_VFMAMLA_qr<"vmlas", "s32", 0b0, 0b10, 0b1>;
|
|
|
|
def MVE_VMLAS_qr_u8 : MVE_VFMAMLA_qr<"vmlas", "u8", 0b1, 0b00, 0b1>;
|
|
|
|
def MVE_VMLAS_qr_u16 : MVE_VFMAMLA_qr<"vmlas", "u16", 0b1, 0b01, 0b1>;
|
|
|
|
def MVE_VMLAS_qr_u32 : MVE_VFMAMLA_qr<"vmlas", "u32", 0b1, 0b10, 0b1>;
|
|
|
|
|
2019-09-03 16:17:46 +08:00
|
|
|
let Predicates = [HasMVEInt] in {
|
|
|
|
def : Pat<(v4i32 (add (v4i32 MQPR:$src1),
|
|
|
|
(v4i32 (mul (v4i32 MQPR:$src2),
|
|
|
|
(v4i32 (ARMvdup (i32 rGPR:$x))))))),
|
|
|
|
(v4i32 (MVE_VMLA_qr_u32 $src1, $src2, $x))>;
|
|
|
|
def : Pat<(v8i16 (add (v8i16 MQPR:$src1),
|
|
|
|
(v8i16 (mul (v8i16 MQPR:$src2),
|
|
|
|
(v8i16 (ARMvdup (i32 rGPR:$x))))))),
|
|
|
|
(v8i16 (MVE_VMLA_qr_u16 $src1, $src2, $x))>;
|
|
|
|
def : Pat<(v16i8 (add (v16i8 MQPR:$src1),
|
|
|
|
(v16i8 (mul (v16i8 MQPR:$src2),
|
|
|
|
(v16i8 (ARMvdup (i32 rGPR:$x))))))),
|
|
|
|
(v16i8 (MVE_VMLA_qr_u8 $src1, $src2, $x))>;
|
|
|
|
}
|
|
|
|
|
2019-06-21 21:17:08 +08:00
|
|
|
let Predicates = [HasMVEFloat] in {
|
|
|
|
def MVE_VFMA_qr_f16 : MVE_VFMAMLA_qr<"vfma", "f16", 0b1, 0b11, 0b0>;
|
|
|
|
def MVE_VFMA_qr_f32 : MVE_VFMAMLA_qr<"vfma", "f32", 0b0, 0b11, 0b0>;
|
|
|
|
def MVE_VFMA_qr_Sf16 : MVE_VFMAMLA_qr<"vfmas", "f16", 0b1, 0b11, 0b1>;
|
|
|
|
def MVE_VFMA_qr_Sf32 : MVE_VFMAMLA_qr<"vfmas", "f32", 0b0, 0b11, 0b1>;
|
|
|
|
}
|
|
|
|
|
|
|
|
class MVE_VQDMLAH_qr<string iname, string suffix, bit U, bits<2> size,
|
|
|
|
bit bit_5, bit bit_12, list<dag> pattern=[]>
|
|
|
|
: MVE_qDestSrc_rSrc<iname, suffix, pattern> {
|
|
|
|
|
|
|
|
let Inst{28} = U;
|
|
|
|
let Inst{21-20} = size;
|
|
|
|
let Inst{16} = 0b0;
|
|
|
|
let Inst{12} = bit_12;
|
|
|
|
let Inst{8} = 0b0;
|
|
|
|
let Inst{5} = bit_5;
|
|
|
|
}
|
|
|
|
|
|
|
|
multiclass MVE_VQDMLAH_qr_types<string iname, bit bit_5, bit bit_12> {
|
|
|
|
def s8 : MVE_VQDMLAH_qr<iname, "s8", 0b0, 0b00, bit_5, bit_12>;
|
|
|
|
def s16 : MVE_VQDMLAH_qr<iname, "s16", 0b0, 0b01, bit_5, bit_12>;
|
|
|
|
def s32 : MVE_VQDMLAH_qr<iname, "s32", 0b0, 0b10, bit_5, bit_12>;
|
|
|
|
}
|
|
|
|
|
|
|
|
defm MVE_VQDMLAH_qr : MVE_VQDMLAH_qr_types<"vqdmlah", 0b1, 0b0>;
|
|
|
|
defm MVE_VQRDMLAH_qr : MVE_VQDMLAH_qr_types<"vqrdmlah", 0b0, 0b0>;
|
|
|
|
defm MVE_VQDMLASH_qr : MVE_VQDMLAH_qr_types<"vqdmlash", 0b1, 0b1>;
|
|
|
|
defm MVE_VQRDMLASH_qr : MVE_VQDMLAH_qr_types<"vqrdmlash", 0b0, 0b1>;
|
|
|
|
|
|
|
|
class MVE_VxDUP<string iname, string suffix, bits<2> size, bit bit_12,
|
|
|
|
list<dag> pattern=[]>
|
|
|
|
: MVE_p<(outs MQPR:$Qd, tGPREven:$Rn),
|
|
|
|
(ins tGPREven:$Rn_src, MVE_VIDUP_imm:$imm), NoItinerary,
|
|
|
|
iname, suffix, "$Qd, $Rn, $imm", vpred_r, "$Rn = $Rn_src",
|
|
|
|
pattern> {
|
|
|
|
bits<4> Qd;
|
|
|
|
bits<4> Rn;
|
|
|
|
bits<2> imm;
|
|
|
|
|
|
|
|
let Inst{28} = 0b0;
|
|
|
|
let Inst{25-23} = 0b100;
|
|
|
|
let Inst{22} = Qd{3};
|
|
|
|
let Inst{21-20} = size;
|
|
|
|
let Inst{19-17} = Rn{3-1};
|
|
|
|
let Inst{16} = 0b1;
|
|
|
|
let Inst{15-13} = Qd{2-0};
|
|
|
|
let Inst{12} = bit_12;
|
|
|
|
let Inst{11-8} = 0b1111;
|
|
|
|
let Inst{7} = imm{1};
|
|
|
|
let Inst{6-1} = 0b110111;
|
|
|
|
let Inst{0} = imm{0};
|
2019-10-15 21:12:51 +08:00
|
|
|
let validForTailPredication = 1;
|
2019-06-21 21:17:08 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
def MVE_VIDUPu8 : MVE_VxDUP<"vidup", "u8", 0b00, 0b0>;
|
|
|
|
def MVE_VIDUPu16 : MVE_VxDUP<"vidup", "u16", 0b01, 0b0>;
|
|
|
|
def MVE_VIDUPu32 : MVE_VxDUP<"vidup", "u32", 0b10, 0b0>;
|
|
|
|
|
|
|
|
def MVE_VDDUPu8 : MVE_VxDUP<"vddup", "u8", 0b00, 0b1>;
|
|
|
|
def MVE_VDDUPu16 : MVE_VxDUP<"vddup", "u16", 0b01, 0b1>;
|
|
|
|
def MVE_VDDUPu32 : MVE_VxDUP<"vddup", "u32", 0b10, 0b1>;
|
|
|
|
|
|
|
|
class MVE_VxWDUP<string iname, string suffix, bits<2> size, bit bit_12,
|
|
|
|
list<dag> pattern=[]>
|
|
|
|
: MVE_p<(outs MQPR:$Qd, tGPREven:$Rn),
|
|
|
|
(ins tGPREven:$Rn_src, tGPROdd:$Rm, MVE_VIDUP_imm:$imm), NoItinerary,
|
|
|
|
iname, suffix, "$Qd, $Rn, $Rm, $imm", vpred_r, "$Rn = $Rn_src",
|
|
|
|
pattern> {
|
|
|
|
bits<4> Qd;
|
|
|
|
bits<4> Rm;
|
|
|
|
bits<4> Rn;
|
|
|
|
bits<2> imm;
|
|
|
|
|
|
|
|
let Inst{28} = 0b0;
|
|
|
|
let Inst{25-23} = 0b100;
|
|
|
|
let Inst{22} = Qd{3};
|
|
|
|
let Inst{21-20} = size;
|
|
|
|
let Inst{19-17} = Rn{3-1};
|
|
|
|
let Inst{16} = 0b1;
|
|
|
|
let Inst{15-13} = Qd{2-0};
|
|
|
|
let Inst{12} = bit_12;
|
|
|
|
let Inst{11-8} = 0b1111;
|
|
|
|
let Inst{7} = imm{1};
|
|
|
|
let Inst{6-4} = 0b110;
|
|
|
|
let Inst{3-1} = Rm{3-1};
|
|
|
|
let Inst{0} = imm{0};
|
2019-10-15 21:12:51 +08:00
|
|
|
let validForTailPredication = 1;
|
2019-06-21 21:17:08 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
def MVE_VIWDUPu8 : MVE_VxWDUP<"viwdup", "u8", 0b00, 0b0>;
|
|
|
|
def MVE_VIWDUPu16 : MVE_VxWDUP<"viwdup", "u16", 0b01, 0b0>;
|
|
|
|
def MVE_VIWDUPu32 : MVE_VxWDUP<"viwdup", "u32", 0b10, 0b0>;
|
|
|
|
|
|
|
|
def MVE_VDWDUPu8 : MVE_VxWDUP<"vdwdup", "u8", 0b00, 0b1>;
|
|
|
|
def MVE_VDWDUPu16 : MVE_VxWDUP<"vdwdup", "u16", 0b01, 0b1>;
|
|
|
|
def MVE_VDWDUPu32 : MVE_VxWDUP<"vdwdup", "u32", 0b10, 0b1>;
|
|
|
|
|
2019-09-23 17:48:25 +08:00
|
|
|
let hasSideEffects = 1 in
|
2019-06-21 21:17:08 +08:00
|
|
|
class MVE_VCTP<string suffix, bits<2> size, list<dag> pattern=[]>
|
|
|
|
: MVE_p<(outs VCCR:$P0), (ins rGPR:$Rn), NoItinerary, "vctp", suffix,
|
|
|
|
"$Rn", vpred_n, "", pattern> {
|
|
|
|
bits<4> Rn;
|
|
|
|
|
|
|
|
let Inst{28-27} = 0b10;
|
|
|
|
let Inst{26-22} = 0b00000;
|
|
|
|
let Inst{21-20} = size;
|
|
|
|
let Inst{19-16} = Rn{3-0};
|
|
|
|
let Inst{15-11} = 0b11101;
|
|
|
|
let Inst{10-0} = 0b00000000001;
|
|
|
|
let Unpredictable{10-0} = 0b11111111111;
|
|
|
|
|
|
|
|
let Constraints = "";
|
|
|
|
let DecoderMethod = "DecodeMveVCTP";
|
2019-10-15 21:12:51 +08:00
|
|
|
let validForTailPredication = 1;
|
2019-06-21 21:17:08 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
def MVE_VCTP8 : MVE_VCTP<"8", 0b00>;
|
|
|
|
def MVE_VCTP16 : MVE_VCTP<"16", 0b01>;
|
|
|
|
def MVE_VCTP32 : MVE_VCTP<"32", 0b10>;
|
|
|
|
def MVE_VCTP64 : MVE_VCTP<"64", 0b11>;
|
|
|
|
|
2019-09-09 20:54:47 +08:00
|
|
|
let Predicates = [HasMVEInt] in {
|
|
|
|
def : Pat<(int_arm_vctp8 rGPR:$Rn),
|
|
|
|
(v16i1 (MVE_VCTP8 rGPR:$Rn))>;
|
|
|
|
def : Pat<(int_arm_vctp16 rGPR:$Rn),
|
|
|
|
(v8i1 (MVE_VCTP16 rGPR:$Rn))>;
|
|
|
|
def : Pat<(int_arm_vctp32 rGPR:$Rn),
|
|
|
|
(v4i1 (MVE_VCTP32 rGPR:$Rn))>;
|
|
|
|
}
|
|
|
|
|
2019-06-21 21:17:08 +08:00
|
|
|
// end of mve_qDest_rSrc
|
|
|
|
|
[ARM] Add MVE 64-bit GPR <-> vector move instructions.
These instructions let you load half a vector register at once from
two general-purpose registers, or vice versa.
The assembly syntax for these instructions mentions the vector
register name twice. For the move _into_ a vector register, the MC
operand list also has to mention the register name twice (once as the
output, and once as an input to represent where the unchanged half of
the output register comes from). So we can conveniently assign one of
the two asm operands to be the output $Qd, and the other $QdSrc, which
avoids confusing the auto-generated AsmMatcher too much. For the move
_from_ a vector register, there's no way to get round the fact that
both instances of that register name have to be inputs, so we need a
custom AsmMatchConverter to avoid generating two separate output MC
operands. (And even that wouldn't have worked if it hadn't been for
D60695.)
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62679
llvm-svn: 364041
2019-06-21 21:17:23 +08:00
|
|
|
// start of coproc mov
|
|
|
|
|
|
|
|
class MVE_VMOV_64bit<dag oops, dag iops, bit to_qreg, string ops, string cstr>
|
|
|
|
: MVE_VMOV_lane_base<oops, !con(iops, (ins MVEPairVectorIndex2:$idx,
|
|
|
|
MVEPairVectorIndex0:$idx2)),
|
|
|
|
NoItinerary, "vmov", "", ops, cstr, []> {
|
|
|
|
bits<5> Rt;
|
|
|
|
bits<5> Rt2;
|
|
|
|
bits<4> Qd;
|
|
|
|
bit idx;
|
|
|
|
bit idx2;
|
|
|
|
|
|
|
|
let Inst{31-23} = 0b111011000;
|
|
|
|
let Inst{22} = Qd{3};
|
|
|
|
let Inst{21} = 0b0;
|
|
|
|
let Inst{20} = to_qreg;
|
|
|
|
let Inst{19-16} = Rt2{3-0};
|
|
|
|
let Inst{15-13} = Qd{2-0};
|
|
|
|
let Inst{12-5} = 0b01111000;
|
|
|
|
let Inst{4} = idx2;
|
|
|
|
let Inst{3-0} = Rt{3-0};
|
|
|
|
}
|
|
|
|
|
|
|
|
// The assembly syntax for these instructions mentions the vector
|
|
|
|
// register name twice, e.g.
|
|
|
|
//
|
|
|
|
// vmov q2[2], q2[0], r0, r1
|
|
|
|
// vmov r0, r1, q2[2], q2[0]
|
|
|
|
//
|
|
|
|
// which needs a bit of juggling with MC operand handling.
|
|
|
|
//
|
|
|
|
// For the move _into_ a vector register, the MC operand list also has
|
|
|
|
// to mention the register name twice: once as the output, and once as
|
|
|
|
// an extra input to represent where the unchanged half of the output
|
|
|
|
// register comes from (when this instruction is used in code
|
|
|
|
// generation). So we arrange that the first mention of the vector reg
|
|
|
|
// in the instruction is considered by the AsmMatcher to be the output
|
|
|
|
// ($Qd), and the second one is the input ($QdSrc). Binding them
|
|
|
|
// together with the existing 'tie' constraint is enough to enforce at
|
|
|
|
// register allocation time that they have to be the same register.
|
|
|
|
//
|
|
|
|
// For the move _from_ a vector register, there's no way to get round
|
|
|
|
// the fact that both instances of that register name have to be
|
|
|
|
// inputs. They have to be the same register again, but this time, we
|
|
|
|
// can't use a tie constraint, because that has to be between an
|
|
|
|
// output and an input operand. So this time, we have to arrange that
|
|
|
|
// the q-reg appears just once in the MC operand list, in spite of
|
|
|
|
// being mentioned twice in the asm syntax - which needs a custom
|
|
|
|
// AsmMatchConverter.
|
|
|
|
|
|
|
|
def MVE_VMOV_q_rr : MVE_VMOV_64bit<(outs MQPR:$Qd),
|
|
|
|
(ins MQPR:$QdSrc, rGPR:$Rt, rGPR:$Rt2),
|
|
|
|
0b1, "$Qd$idx, $QdSrc$idx2, $Rt, $Rt2",
|
|
|
|
"$Qd = $QdSrc"> {
|
|
|
|
let DecoderMethod = "DecodeMVEVMOVDRegtoQ";
|
|
|
|
}
|
|
|
|
|
|
|
|
def MVE_VMOV_rr_q : MVE_VMOV_64bit<(outs rGPR:$Rt, rGPR:$Rt2), (ins MQPR:$Qd),
|
|
|
|
0b0, "$Rt, $Rt2, $Qd$idx, $Qd$idx2", ""> {
|
|
|
|
let DecoderMethod = "DecodeMVEVMOVQtoDReg";
|
|
|
|
let AsmMatchConverter = "cvtMVEVMOVQtoDReg";
|
|
|
|
}
|
|
|
|
|
|
|
|
// end of coproc mov
|
|
|
|
|
2019-06-24 18:00:39 +08:00
|
|
|
// start of MVE interleaving load/store
|
|
|
|
|
|
|
|
// Base class for the family of interleaving/deinterleaving
|
|
|
|
// load/stores with names like VLD20.8 and VST43.32.
|
|
|
|
class MVE_vldst24_base<bit writeback, bit fourregs, bits<2> stage, bits<2> size,
|
|
|
|
bit load, dag Oops, dag loadIops, dag wbIops,
|
|
|
|
string iname, string ops,
|
|
|
|
string cstr, list<dag> pattern=[]>
|
|
|
|
: MVE_MI<Oops, !con(loadIops, wbIops), NoItinerary, iname, ops, cstr, pattern> {
|
|
|
|
bits<4> VQd;
|
|
|
|
bits<4> Rn;
|
|
|
|
|
|
|
|
let Inst{31-22} = 0b1111110010;
|
|
|
|
let Inst{21} = writeback;
|
|
|
|
let Inst{20} = load;
|
|
|
|
let Inst{19-16} = Rn;
|
|
|
|
let Inst{15-13} = VQd{2-0};
|
|
|
|
let Inst{12-9} = 0b1111;
|
|
|
|
let Inst{8-7} = size;
|
|
|
|
let Inst{6-5} = stage;
|
|
|
|
let Inst{4-1} = 0b0000;
|
|
|
|
let Inst{0} = fourregs;
|
|
|
|
|
|
|
|
let mayLoad = load;
|
|
|
|
let mayStore = !eq(load,0);
|
|
|
|
}
|
|
|
|
|
|
|
|
// A parameter class used to encapsulate all the ways the writeback
|
|
|
|
// variants of VLD20 and friends differ from the non-writeback ones.
|
|
|
|
class MVE_vldst24_writeback<bit b, dag Oo, dag Io,
|
|
|
|
string sy="", string c="", string n=""> {
|
|
|
|
bit writeback = b;
|
|
|
|
dag Oops = Oo;
|
|
|
|
dag Iops = Io;
|
|
|
|
string syntax = sy;
|
|
|
|
string cstr = c;
|
|
|
|
string id_suffix = n;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Another parameter class that encapsulates the differences between VLD2x
|
|
|
|
// and VLD4x.
|
|
|
|
class MVE_vldst24_nvecs<int n, list<int> s, bit b, RegisterOperand vl> {
|
|
|
|
int nvecs = n;
|
|
|
|
list<int> stages = s;
|
|
|
|
bit bit0 = b;
|
|
|
|
RegisterOperand VecList = vl;
|
|
|
|
}
|
|
|
|
|
|
|
|
// A third parameter class that distinguishes VLDnn.8 from .16 from .32.
|
|
|
|
class MVE_vldst24_lanesize<int i, bits<2> b> {
|
|
|
|
int lanesize = i;
|
|
|
|
bits<2> sizebits = b;
|
|
|
|
}
|
|
|
|
|
|
|
|
// A base class for each direction of transfer: one for load, one for
|
|
|
|
// store. I can't make these a fourth independent parametric tuple
|
|
|
|
// class, because they have to take the nvecs tuple class as a
|
|
|
|
// parameter, in order to find the right VecList operand type.
|
|
|
|
|
|
|
|
class MVE_vld24_base<MVE_vldst24_nvecs n, bits<2> pat, bits<2> size,
|
|
|
|
MVE_vldst24_writeback wb, string iname,
|
|
|
|
list<dag> pattern=[]>
|
|
|
|
: MVE_vldst24_base<wb.writeback, n.bit0, pat, size, 1,
|
|
|
|
!con((outs n.VecList:$VQd), wb.Oops),
|
|
|
|
(ins n.VecList:$VQdSrc), wb.Iops,
|
|
|
|
iname, "$VQd, $Rn" # wb.syntax,
|
|
|
|
wb.cstr # ",$VQdSrc = $VQd", pattern>;
|
|
|
|
|
|
|
|
class MVE_vst24_base<MVE_vldst24_nvecs n, bits<2> pat, bits<2> size,
|
|
|
|
MVE_vldst24_writeback wb, string iname,
|
|
|
|
list<dag> pattern=[]>
|
|
|
|
: MVE_vldst24_base<wb.writeback, n.bit0, pat, size, 0,
|
|
|
|
wb.Oops, (ins n.VecList:$VQd), wb.Iops,
|
|
|
|
iname, "$VQd, $Rn" # wb.syntax,
|
|
|
|
wb.cstr, pattern>;
|
|
|
|
|
|
|
|
// Actually define all the interleaving loads and stores, by a series
|
|
|
|
// of nested foreaches over number of vectors (VLD2/VLD4); stage
|
|
|
|
// within one of those series (VLDx0/VLDx1/VLDx2/VLDx3); size of
|
|
|
|
// vector lane; writeback or no writeback.
|
|
|
|
foreach n = [MVE_vldst24_nvecs<2, [0,1], 0, VecList2Q>,
|
|
|
|
MVE_vldst24_nvecs<4, [0,1,2,3], 1, VecList4Q>] in
|
|
|
|
foreach stage = n.stages in
|
|
|
|
foreach s = [MVE_vldst24_lanesize< 8, 0b00>,
|
|
|
|
MVE_vldst24_lanesize<16, 0b01>,
|
|
|
|
MVE_vldst24_lanesize<32, 0b10>] in
|
|
|
|
foreach wb = [MVE_vldst24_writeback<
|
|
|
|
1, (outs rGPR:$wb), (ins t2_nosp_addr_offset_none:$Rn),
|
|
|
|
"!", "$Rn.base = $wb", "_wb">,
|
|
|
|
MVE_vldst24_writeback<0, (outs), (ins t2_addr_offset_none:$Rn)>] in {
|
|
|
|
|
|
|
|
// For each case within all of those foreaches, define the actual
|
|
|
|
// instructions. The def names are made by gluing together pieces
|
|
|
|
// from all the parameter classes, and will end up being things like
|
|
|
|
// MVE_VLD20_8 and MVE_VST43_16_wb.
|
|
|
|
|
|
|
|
def "MVE_VLD" # n.nvecs # stage # "_" # s.lanesize # wb.id_suffix
|
|
|
|
: MVE_vld24_base<n, stage, s.sizebits, wb,
|
|
|
|
"vld" # n.nvecs # stage # "." # s.lanesize>;
|
|
|
|
|
|
|
|
def "MVE_VST" # n.nvecs # stage # "_" # s.lanesize # wb.id_suffix
|
|
|
|
: MVE_vst24_base<n, stage, s.sizebits, wb,
|
|
|
|
"vst" # n.nvecs # stage # "." # s.lanesize>;
|
|
|
|
}
|
|
|
|
|
[ARM] Add IR intrinsics for MVE VLD[24] and VST[24].
The VST2 and VST4 instructions take two or four vector registers as
input, and store part of each register to memory in an interleaved
pattern. They come in variants indicating which part of each register
they store (VST20 and VST21; VST40 to VST43 inclusive); the intention
is that issuing each of those variants in turn has the combined effect
of loading or storing the whole set of registers to a memory block of
equal size. The corresponding VLD2 and VLD4 instructions load from
memory in the same interleaved format: each one overwrites only part
of its output register set, and again, the idea is that if you use
VLD4{0,1,2,3} or VLD2{0,1} together, you end up having written to the
whole of each register.
I've implemented the stores and loads quite differently. The loads
were easiest to implement as a single intrinsic that expands to all
four VLD4x instructions or both VLD2x, delivering four complete output
registers. (Implementing each individual load as a separate
instruction taking four input registers to partially overwrite is
possible in theory, but pointless, and when I tried it, I found it
would need extra work to get the register allocation not to be
horrible.) Since that intrinsic delivers multiple outputs, it has to
be instruction-selected in custom C++.
But the store instructions are easier to model individually, because
they don't overwrite any register at all and you can write a DAG Isel
pattern in Tablegen for each one.
Hence, my new intrinsic `int_arm_mve_vld4q` expands to four load
instructions, delivers four full output vectors, and is handled by C++
code, whereas `int_arm_mve_vst4q` expands to just one store
instruction, takes four input vectors and a constant indicating which
lanes to store, and is handled entirely in Tablegen. (And similarly
for vld2q/vst2q.) This is asymmetric, but it was the easiest way to do
each one.
Reviewers: dmgreen, miyuki, ostannard
Subscribers: kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D68700
2019-10-08 00:03:46 +08:00
|
|
|
multiclass MVE_vst24_patterns<int lanesize, ValueType VT> {
|
|
|
|
foreach stage = [0,1] in
|
|
|
|
def : Pat<(int_arm_mve_vst2q i32:$addr,
|
|
|
|
(VT MQPR:$v0), (VT MQPR:$v1), (i32 stage)),
|
|
|
|
(!cast<Instruction>("MVE_VST2"#stage#"_"#lanesize)
|
|
|
|
(REG_SEQUENCE QQPR, VT:$v0, qsub_0, VT:$v1, qsub_1),
|
|
|
|
t2_addr_offset_none:$addr)>;
|
|
|
|
|
|
|
|
foreach stage = [0,1,2,3] in
|
|
|
|
def : Pat<(int_arm_mve_vst4q i32:$addr,
|
|
|
|
(VT MQPR:$v0), (VT MQPR:$v1),
|
|
|
|
(VT MQPR:$v2), (VT MQPR:$v3), (i32 stage)),
|
|
|
|
(!cast<Instruction>("MVE_VST4"#stage#"_"#lanesize)
|
|
|
|
(REG_SEQUENCE QQQQPR, VT:$v0, qsub_0, VT:$v1, qsub_1,
|
|
|
|
VT:$v2, qsub_2, VT:$v3, qsub_3),
|
|
|
|
t2_addr_offset_none:$addr)>;
|
|
|
|
}
|
|
|
|
defm : MVE_vst24_patterns<8, v16i8>;
|
|
|
|
defm : MVE_vst24_patterns<16, v8i16>;
|
|
|
|
defm : MVE_vst24_patterns<32, v4i32>;
|
|
|
|
defm : MVE_vst24_patterns<16, v8f16>;
|
|
|
|
defm : MVE_vst24_patterns<32, v4f32>;
|
|
|
|
|
2019-06-24 18:00:39 +08:00
|
|
|
// end of MVE interleaving load/store
|
|
|
|
|
[ARM] Add MVE vector load/store instructions.
This adds the rest of the vector memory access instructions. It
includes contiguous loads/stores, with an ordinary addressing mode
such as [r0,#offset] (plus writeback variants); gather loads and
scatter stores with a scalar base address register and a vector of
offsets from it (written [r0,q1] or similar); and gather/scatters with
a vector of base addresses (written [q0,#offset], again with
writeback). Additionally, some of the loads can widen each loaded
value into a larger vector lane, and the corresponding stores narrow
them again.
To implement these, we also have to add the addressing modes they
need. Also, in AsmParser, the `isMem` query function now has
subqueries `isGPRMem` and `isMVEMem`, according to which kind of base
register is used by a given memory access operand.
I've also had to add an extra check in `checkTargetMatchPredicate` in
the AsmParser, without which our last-minute check of `rGPR` register
operands against SP and PC was failing an assertion because Tablegen
had inserted an immediate 0 in place of one of a pair of tied register
operands. (This matches the way the corresponding check for `MCK_rGPR`
in `validateTargetOperandClass` is guarded.) Apparently the MVE load
instructions were the first to have ever triggered this assertion, but
I think only because they were the first to have a combination of the
usual Arm pre/post writeback system and the `rGPR` class in particular.
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62680
llvm-svn: 364291
2019-06-25 19:24:18 +08:00
|
|
|
// start of MVE predicable load/store
|
|
|
|
|
|
|
|
// A parameter class for the direction of transfer.
|
|
|
|
class MVE_ldst_direction<bit b, dag Oo, dag Io, string c=""> {
|
|
|
|
bit load = b;
|
|
|
|
dag Oops = Oo;
|
|
|
|
dag Iops = Io;
|
|
|
|
string cstr = c;
|
|
|
|
}
|
|
|
|
def MVE_ld: MVE_ldst_direction<1, (outs MQPR:$Qd), (ins), ",@earlyclobber $Qd">;
|
|
|
|
def MVE_st: MVE_ldst_direction<0, (outs), (ins MQPR:$Qd)>;
|
|
|
|
|
|
|
|
// A parameter class for the size of memory access in a load.
|
|
|
|
class MVE_memsz<bits<2> e, int s, AddrMode m, string mn, list<string> types> {
|
|
|
|
bits<2> encoding = e; // opcode bit(s) for encoding
|
|
|
|
int shift = s; // shift applied to immediate load offset
|
|
|
|
AddrMode AM = m;
|
|
|
|
|
|
|
|
// For instruction aliases: define the complete list of type
|
|
|
|
// suffixes at this size, and the canonical ones for loads and
|
|
|
|
// stores.
|
|
|
|
string MnemonicLetter = mn;
|
|
|
|
int TypeBits = !shl(8, s);
|
|
|
|
string CanonLoadSuffix = ".u" # TypeBits;
|
|
|
|
string CanonStoreSuffix = "." # TypeBits;
|
|
|
|
list<string> suffixes = !foreach(letter, types, "." # letter # TypeBits);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Instances of MVE_memsz.
|
|
|
|
//
|
|
|
|
// (memD doesn't need an AddrMode, because those are only for
|
|
|
|
// contiguous loads, and memD is only used by gather/scatters.)
|
|
|
|
def MVE_memB: MVE_memsz<0b00, 0, AddrModeT2_i7, "b", ["", "u", "s"]>;
|
|
|
|
def MVE_memH: MVE_memsz<0b01, 1, AddrModeT2_i7s2, "h", ["", "u", "s", "f"]>;
|
|
|
|
def MVE_memW: MVE_memsz<0b10, 2, AddrModeT2_i7s4, "w", ["", "u", "s", "f"]>;
|
|
|
|
def MVE_memD: MVE_memsz<0b11, 3, ?, "d", ["", "u", "s", "f"]>;
|
|
|
|
|
|
|
|
// This is the base class for all the MVE loads and stores other than
|
|
|
|
// the interleaving ones. All the non-interleaving loads/stores share
|
|
|
|
// the characteristic that they operate on just one vector register,
|
|
|
|
// so they are VPT-predicable.
|
|
|
|
//
|
|
|
|
// The predication operand is vpred_n, for both loads and stores. For
|
|
|
|
// store instructions, the reason is obvious: if there is no output
|
|
|
|
// register, there can't be a need for an input parameter giving the
|
|
|
|
// output register's previous value. Load instructions also don't need
|
|
|
|
// that input parameter, because unlike MVE data processing
|
|
|
|
// instructions, predicated loads are defined to set the inactive
|
|
|
|
// lanes of the output register to zero, instead of preserving their
|
|
|
|
// input values.
|
|
|
|
class MVE_VLDRSTR_base<MVE_ldst_direction dir, bit U, bit P, bit W, bit opc,
|
|
|
|
dag oops, dag iops, string asm, string suffix,
|
|
|
|
string ops, string cstr, list<dag> pattern=[]>
|
|
|
|
: MVE_p<oops, iops, NoItinerary, asm, suffix, ops, vpred_n, cstr, pattern> {
|
|
|
|
bits<3> Qd;
|
|
|
|
|
|
|
|
let Inst{28} = U;
|
|
|
|
let Inst{25} = 0b0;
|
|
|
|
let Inst{24} = P;
|
|
|
|
let Inst{22} = 0b0;
|
|
|
|
let Inst{21} = W;
|
|
|
|
let Inst{20} = dir.load;
|
|
|
|
let Inst{15-13} = Qd{2-0};
|
|
|
|
let Inst{12} = opc;
|
|
|
|
let Inst{11-9} = 0b111;
|
|
|
|
|
|
|
|
let mayLoad = dir.load;
|
|
|
|
let mayStore = !eq(dir.load,0);
|
2019-10-15 21:12:51 +08:00
|
|
|
let validForTailPredication = 1;
|
[ARM] Add MVE vector load/store instructions.
This adds the rest of the vector memory access instructions. It
includes contiguous loads/stores, with an ordinary addressing mode
such as [r0,#offset] (plus writeback variants); gather loads and
scatter stores with a scalar base address register and a vector of
offsets from it (written [r0,q1] or similar); and gather/scatters with
a vector of base addresses (written [q0,#offset], again with
writeback). Additionally, some of the loads can widen each loaded
value into a larger vector lane, and the corresponding stores narrow
them again.
To implement these, we also have to add the addressing modes they
need. Also, in AsmParser, the `isMem` query function now has
subqueries `isGPRMem` and `isMVEMem`, according to which kind of base
register is used by a given memory access operand.
I've also had to add an extra check in `checkTargetMatchPredicate` in
the AsmParser, without which our last-minute check of `rGPR` register
operands against SP and PC was failing an assertion because Tablegen
had inserted an immediate 0 in place of one of a pair of tied register
operands. (This matches the way the corresponding check for `MCK_rGPR`
in `validateTargetOperandClass` is guarded.) Apparently the MVE load
instructions were the first to have ever triggered this assertion, but
I think only because they were the first to have a combination of the
usual Arm pre/post writeback system and the `rGPR` class in particular.
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62680
llvm-svn: 364291
2019-06-25 19:24:18 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// Contiguous load and store instructions. These come in two main
|
|
|
|
// categories: same-size loads/stores in which 128 bits of vector
|
|
|
|
// register is transferred to or from 128 bits of memory in the most
|
|
|
|
// obvious way, and widening loads / narrowing stores, in which the
|
|
|
|
// size of memory accessed is less than the size of a vector register,
|
|
|
|
// so the load instructions sign- or zero-extend each memory value
|
|
|
|
// into a wider vector lane, and the store instructions truncate
|
|
|
|
// correspondingly.
|
|
|
|
//
|
|
|
|
// The instruction mnemonics for these two classes look reasonably
|
|
|
|
// similar, but the actual encodings are different enough to need two
|
|
|
|
// separate base classes.
|
|
|
|
|
|
|
|
// Contiguous, same size
|
|
|
|
class MVE_VLDRSTR_cs<MVE_ldst_direction dir, MVE_memsz memsz, bit P, bit W,
|
|
|
|
dag oops, dag iops, string asm, string suffix,
|
|
|
|
IndexMode im, string ops, string cstr>
|
|
|
|
: MVE_VLDRSTR_base<dir, 0, P, W, 1, oops, iops, asm, suffix, ops, cstr> {
|
|
|
|
bits<12> addr;
|
|
|
|
let Inst{23} = addr{7};
|
|
|
|
let Inst{19-16} = addr{11-8};
|
|
|
|
let Inst{8-7} = memsz.encoding;
|
|
|
|
let Inst{6-0} = addr{6-0};
|
|
|
|
}
|
|
|
|
|
|
|
|
// Contiguous, widening/narrowing
|
|
|
|
class MVE_VLDRSTR_cw<MVE_ldst_direction dir, MVE_memsz memsz, bit U,
|
|
|
|
bit P, bit W, bits<2> size, dag oops, dag iops,
|
|
|
|
string asm, string suffix, IndexMode im,
|
|
|
|
string ops, string cstr>
|
|
|
|
: MVE_VLDRSTR_base<dir, U, P, W, 0, oops, iops, asm, suffix, ops, cstr> {
|
|
|
|
bits<11> addr;
|
|
|
|
let Inst{23} = addr{7};
|
|
|
|
let Inst{19} = memsz.encoding{0}; // enough to tell 16- from 32-bit
|
|
|
|
let Inst{18-16} = addr{10-8};
|
|
|
|
let Inst{8-7} = size;
|
|
|
|
let Inst{6-0} = addr{6-0};
|
|
|
|
|
|
|
|
let IM = im;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Multiclass wrapper on each of the _cw and _cs base classes, to
|
|
|
|
// generate three writeback modes (none, preindex, postindex).
|
|
|
|
|
|
|
|
multiclass MVE_VLDRSTR_cw_m<MVE_ldst_direction dir, MVE_memsz memsz,
|
|
|
|
string asm, string suffix, bit U, bits<2> size> {
|
|
|
|
let AM = memsz.AM in {
|
|
|
|
def "" : MVE_VLDRSTR_cw<
|
|
|
|
dir, memsz, U, 1, 0, size,
|
|
|
|
dir.Oops, !con(dir.Iops, (ins taddrmode_imm7<memsz.shift>:$addr)),
|
|
|
|
asm, suffix, IndexModeNone, "$Qd, $addr", "">;
|
|
|
|
|
|
|
|
def _pre : MVE_VLDRSTR_cw<
|
|
|
|
dir, memsz, U, 1, 1, size,
|
|
|
|
!con((outs tGPR:$wb), dir.Oops),
|
|
|
|
!con(dir.Iops, (ins taddrmode_imm7<memsz.shift>:$addr)),
|
|
|
|
asm, suffix, IndexModePre, "$Qd, $addr!", "$addr.base = $wb"> {
|
|
|
|
let DecoderMethod = "DecodeMVE_MEM_1_pre<"#memsz.shift#">";
|
|
|
|
}
|
|
|
|
|
|
|
|
def _post : MVE_VLDRSTR_cw<
|
|
|
|
dir, memsz, U, 0, 1, size,
|
|
|
|
!con((outs tGPR:$wb), dir.Oops),
|
|
|
|
!con(dir.Iops, (ins t_addr_offset_none:$Rn,
|
|
|
|
t2am_imm7_offset<memsz.shift>:$addr)),
|
|
|
|
asm, suffix, IndexModePost, "$Qd, $Rn$addr", "$Rn.base = $wb"> {
|
|
|
|
bits<4> Rn;
|
|
|
|
let Inst{18-16} = Rn{2-0};
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
multiclass MVE_VLDRSTR_cs_m<MVE_ldst_direction dir, MVE_memsz memsz,
|
|
|
|
string asm, string suffix> {
|
|
|
|
let AM = memsz.AM in {
|
|
|
|
def "" : MVE_VLDRSTR_cs<
|
|
|
|
dir, memsz, 1, 0,
|
|
|
|
dir.Oops, !con(dir.Iops, (ins t2addrmode_imm7<memsz.shift>:$addr)),
|
|
|
|
asm, suffix, IndexModeNone, "$Qd, $addr", "">;
|
|
|
|
|
|
|
|
def _pre : MVE_VLDRSTR_cs<
|
|
|
|
dir, memsz, 1, 1,
|
|
|
|
!con((outs rGPR:$wb), dir.Oops),
|
|
|
|
!con(dir.Iops, (ins t2addrmode_imm7_pre<memsz.shift>:$addr)),
|
|
|
|
asm, suffix, IndexModePre, "$Qd, $addr!", "$addr.base = $wb"> {
|
|
|
|
let DecoderMethod = "DecodeMVE_MEM_2_pre<"#memsz.shift#">";
|
|
|
|
}
|
|
|
|
|
|
|
|
def _post : MVE_VLDRSTR_cs<
|
|
|
|
dir, memsz, 0, 1,
|
|
|
|
!con((outs rGPR:$wb), dir.Oops),
|
|
|
|
// We need an !if here to select the base register class,
|
|
|
|
// because it's legal to write back to SP in a load of this
|
|
|
|
// type, but not in a store.
|
|
|
|
!con(dir.Iops, (ins !if(dir.load, t2_addr_offset_none,
|
|
|
|
t2_nosp_addr_offset_none):$Rn,
|
|
|
|
t2am_imm7_offset<memsz.shift>:$addr)),
|
|
|
|
asm, suffix, IndexModePost, "$Qd, $Rn$addr", "$Rn.base = $wb"> {
|
|
|
|
bits<4> Rn;
|
|
|
|
let Inst{19-16} = Rn{3-0};
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Now actually declare all the contiguous load/stores, via those
|
|
|
|
// multiclasses. The instruction ids coming out of this are the bare
|
|
|
|
// names shown in the defm, with _pre or _post appended for writeback,
|
|
|
|
// e.g. MVE_VLDRBS16, MVE_VSTRB16_pre, MVE_VSTRHU16_post.
|
|
|
|
|
|
|
|
defm MVE_VLDRBS16: MVE_VLDRSTR_cw_m<MVE_ld, MVE_memB, "vldrb", "s16", 0, 0b01>;
|
|
|
|
defm MVE_VLDRBS32: MVE_VLDRSTR_cw_m<MVE_ld, MVE_memB, "vldrb", "s32", 0, 0b10>;
|
|
|
|
defm MVE_VLDRBU16: MVE_VLDRSTR_cw_m<MVE_ld, MVE_memB, "vldrb", "u16", 1, 0b01>;
|
|
|
|
defm MVE_VLDRBU32: MVE_VLDRSTR_cw_m<MVE_ld, MVE_memB, "vldrb", "u32", 1, 0b10>;
|
|
|
|
defm MVE_VLDRHS32: MVE_VLDRSTR_cw_m<MVE_ld, MVE_memH, "vldrh", "s32", 0, 0b10>;
|
|
|
|
defm MVE_VLDRHU32: MVE_VLDRSTR_cw_m<MVE_ld, MVE_memH, "vldrh", "u32", 1, 0b10>;
|
|
|
|
|
|
|
|
defm MVE_VLDRBU8: MVE_VLDRSTR_cs_m<MVE_ld, MVE_memB, "vldrb", "u8">;
|
|
|
|
defm MVE_VLDRHU16: MVE_VLDRSTR_cs_m<MVE_ld, MVE_memH, "vldrh", "u16">;
|
|
|
|
defm MVE_VLDRWU32: MVE_VLDRSTR_cs_m<MVE_ld, MVE_memW, "vldrw", "u32">;
|
|
|
|
|
|
|
|
defm MVE_VSTRB16: MVE_VLDRSTR_cw_m<MVE_st, MVE_memB, "vstrb", "16", 0, 0b01>;
|
|
|
|
defm MVE_VSTRB32: MVE_VLDRSTR_cw_m<MVE_st, MVE_memB, "vstrb", "32", 0, 0b10>;
|
|
|
|
defm MVE_VSTRH32: MVE_VLDRSTR_cw_m<MVE_st, MVE_memH, "vstrh", "32", 0, 0b10>;
|
|
|
|
|
|
|
|
defm MVE_VSTRBU8 : MVE_VLDRSTR_cs_m<MVE_st, MVE_memB, "vstrb", "8">;
|
|
|
|
defm MVE_VSTRHU16: MVE_VLDRSTR_cs_m<MVE_st, MVE_memH, "vstrh", "16">;
|
|
|
|
defm MVE_VSTRWU32: MVE_VLDRSTR_cs_m<MVE_st, MVE_memW, "vstrw", "32">;
|
|
|
|
|
|
|
|
// Gather loads / scatter stores whose address operand is of the form
|
|
|
|
// [Rn,Qm], i.e. a single GPR as the common base address, plus a
|
|
|
|
// vector of offset from it. ('Load/store this sequence of elements of
|
|
|
|
// the same array.')
|
|
|
|
//
|
|
|
|
// Like the contiguous family, these loads and stores can widen the
|
|
|
|
// loaded values / truncate the stored ones, or they can just
|
|
|
|
// load/store the same size of memory and vector lane. But unlike the
|
|
|
|
// contiguous family, there's no particular difference in encoding
|
|
|
|
// between those two cases.
|
|
|
|
//
|
|
|
|
// This family also comes with the option to scale the offset values
|
|
|
|
// in Qm by the size of the loaded memory (i.e. to treat them as array
|
|
|
|
// indices), or not to scale them (to treat them as plain byte offsets
|
|
|
|
// in memory, so that perhaps the loaded values are unaligned). The
|
|
|
|
// scaled instructions' address operand in assembly looks like
|
|
|
|
// [Rn,Qm,UXTW #2] or similar.
|
|
|
|
|
|
|
|
// Base class.
|
|
|
|
class MVE_VLDRSTR_rq<MVE_ldst_direction dir, MVE_memsz memsz, bit U,
|
|
|
|
bits<2> size, bit os, string asm, string suffix, int shift>
|
|
|
|
: MVE_VLDRSTR_base<dir, U, 0b0, 0b0, 0, dir.Oops,
|
|
|
|
!con(dir.Iops, (ins mve_addr_rq_shift<shift>:$addr)),
|
|
|
|
asm, suffix, "$Qd, $addr", dir.cstr> {
|
|
|
|
bits<7> addr;
|
|
|
|
let Inst{23} = 0b1;
|
|
|
|
let Inst{19-16} = addr{6-3};
|
|
|
|
let Inst{8-7} = size;
|
|
|
|
let Inst{6} = memsz.encoding{1};
|
|
|
|
let Inst{5} = 0;
|
|
|
|
let Inst{4} = memsz.encoding{0};
|
|
|
|
let Inst{3-1} = addr{2-0};
|
|
|
|
let Inst{0} = os;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Multiclass that defines the scaled and unscaled versions of an
|
|
|
|
// instruction, when the memory size is wider than a byte. The scaled
|
|
|
|
// version gets the default name like MVE_VLDRBU16_rq; the unscaled /
|
|
|
|
// potentially unaligned version gets a "_u" suffix, e.g.
|
|
|
|
// MVE_VLDRBU16_rq_u.
|
|
|
|
multiclass MVE_VLDRSTR_rq_w<MVE_ldst_direction dir, MVE_memsz memsz,
|
|
|
|
string asm, string suffix, bit U, bits<2> size> {
|
|
|
|
def _u : MVE_VLDRSTR_rq<dir, memsz, U, size, 0, asm, suffix, 0>;
|
|
|
|
def "" : MVE_VLDRSTR_rq<dir, memsz, U, size, 1, asm, suffix, memsz.shift>;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Subclass of MVE_VLDRSTR_rq with the same API as that multiclass,
|
|
|
|
// for use when the memory size is one byte, so there's no 'scaled'
|
|
|
|
// version of the instruction at all. (This is encoded as if it were
|
|
|
|
// unscaled, but named in the default way with no _u suffix.)
|
|
|
|
class MVE_VLDRSTR_rq_b<MVE_ldst_direction dir, MVE_memsz memsz,
|
|
|
|
string asm, string suffix, bit U, bits<2> size>
|
|
|
|
: MVE_VLDRSTR_rq<dir, memsz, U, size, 0, asm, suffix, 0>;
|
|
|
|
|
[ARM,MVE] Add intrinsics for gather/scatter load/stores.
This patch adds two new families of intrinsics, both of which are
memory accesses taking a vector of locations to load from / store to.
The vldrq_gather_base / vstrq_scatter_base intrinsics take a vector of
base addresses, and an immediate offset to be added consistently to
each one. vldrq_gather_offset / vstrq_scatter_offset take a scalar
base address, and a vector of offsets to add to it. The
'shifted_offset' variants also multiply each offset by the element
size type, so that the vector is effectively of array indices.
At the IR level, these operations are represented by a single set of
four IR intrinsics: {gather,scatter} × {base,offset}. The other
details (signed/unsigned, shift, and memory element size as opposed to
vector element size) are all specified by IR intrinsic polymorphism
and immediate operands, because that made the selection job easier
than making a huge family of similarly named intrinsics.
I considered using the standard IR representations such as
llvm.masked.gather, but they're not a good fit. In order to use
llvm.masked.gather to represent a gather_offset load with element size
smaller than a pointer, you'd have to expand the <8 x i16> vector of
offsets into an <8 x i16*> vector of pointers, which would be split up
during legalization, so you'd spend most of your time undoing the mess
it had made. Also, ISel support for llvm.masked.gather would be easy
enough in a trivial way (you can expand it into a gather-base load
with a zero immediate offset), but instruction-selecting lots of
fiddly idioms back into all the _other_ MVE load instructions would be
much more work. So I think dedicated IR intrinsics are the more
sensible approach, at least for the moment.
On the clang tablegen side, I've added two new features to the
Tablegen source accepted by MveEmitter: a 'CopyKind' type node for
defining a type that varies with the parameter type (it lets you ask
for an unsigned integer type of the same width as the parameter), and
an 'unsignedflag' value node for passing an immediate IR operand which
is 0 for a signed integer type or 1 for an unsigned one. That lets me
write each kind of intrinsic just once and get all its subtypes and
immediate arguments generated automatically.
Also I've tweaked the handling of pointer-typed values in the code
generation part of MveEmitter: they're generated as Address rather
than Value (i.e. including an alignment) so that they can be given to
the ordinary IR load and store operations, but I'd omitted the code to
convert them back to Value when they're going to be used as an
argument to an IR intrinsic.
On the MC side, I've enhanced MVEVectorVTInfo so that it can tell you
not only the full assembly-language suffix for a given vector type
(like 's32' or 'u16') but also the numeric-only one used by store
instructions (just '32' or '16').
Reviewers: dmgreen
Subscribers: kristof.beyls, hiraditya, cfe-commits, llvm-commits
Tags: #clang, #llvm
Differential Revision: https://reviews.llvm.org/D69791
2019-11-01 01:02:07 +08:00
|
|
|
// Multiclasses wrapping that to add ISel patterns for intrinsics.
|
|
|
|
multiclass MVE_VLDR_rq_w<MVE_memsz memsz, list<MVEVectorVTInfo> VTIs> {
|
|
|
|
defm "": MVE_VLDRSTR_rq_w<MVE_ld, memsz, "vldr" # memsz.MnemonicLetter,
|
|
|
|
VTIs[0].Suffix, VTIs[0].Unsigned, VTIs[0].Size>;
|
|
|
|
foreach VTI = VTIs in
|
|
|
|
foreach UnsignedFlag = !if(!eq(VTI.Size, memsz.encoding),
|
|
|
|
[0,1], [VTI.Unsigned]) in {
|
|
|
|
def : Pat<(VTI.Vec (int_arm_mve_vldr_gather_offset GPR:$base, (VTIs[0].Vec MQPR:$offsets), memsz.TypeBits, 0, UnsignedFlag)),
|
|
|
|
(VTI.Vec (!cast<Instruction>(NAME#"_u") GPR:$base, MQPR:$offsets))>;
|
|
|
|
def : Pat<(VTI.Vec (int_arm_mve_vldr_gather_offset GPR:$base, (VTIs[0].Vec MQPR:$offsets), memsz.TypeBits, memsz.shift, UnsignedFlag)),
|
|
|
|
(VTI.Vec (!cast<Instruction>(NAME) GPR:$base, MQPR:$offsets))>;
|
|
|
|
def : Pat<(VTI.Vec (int_arm_mve_vldr_gather_offset_predicated GPR:$base, (VTIs[0].Vec MQPR:$offsets), memsz.TypeBits, 0, UnsignedFlag, (VTI.Pred VCCR:$pred))),
|
|
|
|
(VTI.Vec (!cast<Instruction>(NAME#"_u") GPR:$base, MQPR:$offsets, 1, VCCR:$pred))>;
|
|
|
|
def : Pat<(VTI.Vec (int_arm_mve_vldr_gather_offset_predicated GPR:$base, (VTIs[0].Vec MQPR:$offsets), memsz.TypeBits, memsz.shift, UnsignedFlag, (VTI.Pred VCCR:$pred))),
|
|
|
|
(VTI.Vec (!cast<Instruction>(NAME) GPR:$base, MQPR:$offsets, 1, VCCR:$pred))>;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
multiclass MVE_VLDR_rq_b<list<MVEVectorVTInfo> VTIs> {
|
|
|
|
def "": MVE_VLDRSTR_rq_b<MVE_ld, MVE_memB, "vldrb",
|
|
|
|
VTIs[0].Suffix, VTIs[0].Unsigned, VTIs[0].Size>;
|
|
|
|
foreach VTI = VTIs in {
|
|
|
|
def : Pat<(VTI.Vec (int_arm_mve_vldr_gather_offset GPR:$base, (VTIs[0].Vec MQPR:$offsets), 8, 0, VTI.Unsigned)),
|
|
|
|
(VTI.Vec (!cast<Instruction>(NAME) GPR:$base, MQPR:$offsets))>;
|
|
|
|
def : Pat<(VTI.Vec (int_arm_mve_vldr_gather_offset_predicated GPR:$base, (VTIs[0].Vec MQPR:$offsets), 8, 0, VTI.Unsigned, (VTI.Pred VCCR:$pred))),
|
|
|
|
(VTI.Vec (!cast<Instruction>(NAME) GPR:$base, MQPR:$offsets, 1, VCCR:$pred))>;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
multiclass MVE_VSTR_rq_w<MVE_memsz memsz, list<MVEVectorVTInfo> VTIs> {
|
|
|
|
defm "": MVE_VLDRSTR_rq_w<MVE_st, memsz, "vstr" # memsz.MnemonicLetter,
|
|
|
|
VTIs[0].BitsSuffix, 0, VTIs[0].Size>;
|
|
|
|
foreach VTI = VTIs in {
|
|
|
|
def : Pat<(int_arm_mve_vstr_scatter_offset GPR:$base, (VTIs[0].Vec MQPR:$offsets), (VTI.Vec MQPR:$data), memsz.TypeBits, 0),
|
|
|
|
(!cast<Instruction>(NAME#"_u") MQPR:$data, GPR:$base, MQPR:$offsets)>;
|
|
|
|
def : Pat<(int_arm_mve_vstr_scatter_offset GPR:$base, (VTIs[0].Vec MQPR:$offsets), (VTI.Vec MQPR:$data), memsz.TypeBits, memsz.shift),
|
|
|
|
(!cast<Instruction>(NAME) MQPR:$data, GPR:$base, MQPR:$offsets)>;
|
|
|
|
def : Pat<(int_arm_mve_vstr_scatter_offset_predicated GPR:$base, (VTIs[0].Vec MQPR:$offsets), (VTI.Vec MQPR:$data), memsz.TypeBits, 0, (VTI.Pred VCCR:$pred)),
|
|
|
|
(!cast<Instruction>(NAME#"_u") MQPR:$data, GPR:$base, MQPR:$offsets, 1, VCCR:$pred)>;
|
|
|
|
def : Pat<(int_arm_mve_vstr_scatter_offset_predicated GPR:$base, (VTIs[0].Vec MQPR:$offsets), (VTI.Vec MQPR:$data), memsz.TypeBits, memsz.shift, (VTI.Pred VCCR:$pred)),
|
|
|
|
(!cast<Instruction>(NAME) MQPR:$data, GPR:$base, MQPR:$offsets, 1, VCCR:$pred)>;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
multiclass MVE_VSTR_rq_b<list<MVEVectorVTInfo> VTIs> {
|
|
|
|
def "": MVE_VLDRSTR_rq_b<MVE_st, MVE_memB, "vstrb",
|
|
|
|
VTIs[0].BitsSuffix, 0, VTIs[0].Size>;
|
|
|
|
foreach VTI = VTIs in {
|
|
|
|
def : Pat<(int_arm_mve_vstr_scatter_offset GPR:$base, (VTIs[0].Vec MQPR:$offsets), (VTI.Vec MQPR:$data), 8, 0),
|
|
|
|
(!cast<Instruction>(NAME) MQPR:$data, GPR:$base, MQPR:$offsets)>;
|
|
|
|
def : Pat<(int_arm_mve_vstr_scatter_offset_predicated GPR:$base, (VTIs[0].Vec MQPR:$offsets), (VTI.Vec MQPR:$data), 8, 0, (VTI.Pred VCCR:$pred)),
|
|
|
|
(!cast<Instruction>(NAME) MQPR:$data, GPR:$base, MQPR:$offsets, 1, VCCR:$pred)>;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
[ARM] Add MVE vector load/store instructions.
This adds the rest of the vector memory access instructions. It
includes contiguous loads/stores, with an ordinary addressing mode
such as [r0,#offset] (plus writeback variants); gather loads and
scatter stores with a scalar base address register and a vector of
offsets from it (written [r0,q1] or similar); and gather/scatters with
a vector of base addresses (written [q0,#offset], again with
writeback). Additionally, some of the loads can widen each loaded
value into a larger vector lane, and the corresponding stores narrow
them again.
To implement these, we also have to add the addressing modes they
need. Also, in AsmParser, the `isMem` query function now has
subqueries `isGPRMem` and `isMVEMem`, according to which kind of base
register is used by a given memory access operand.
I've also had to add an extra check in `checkTargetMatchPredicate` in
the AsmParser, without which our last-minute check of `rGPR` register
operands against SP and PC was failing an assertion because Tablegen
had inserted an immediate 0 in place of one of a pair of tied register
operands. (This matches the way the corresponding check for `MCK_rGPR`
in `validateTargetOperandClass` is guarded.) Apparently the MVE load
instructions were the first to have ever triggered this assertion, but
I think only because they were the first to have a combination of the
usual Arm pre/post writeback system and the `rGPR` class in particular.
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62680
llvm-svn: 364291
2019-06-25 19:24:18 +08:00
|
|
|
// Actually define all the loads and stores in this family.
|
|
|
|
|
[ARM,MVE] Add intrinsics for gather/scatter load/stores.
This patch adds two new families of intrinsics, both of which are
memory accesses taking a vector of locations to load from / store to.
The vldrq_gather_base / vstrq_scatter_base intrinsics take a vector of
base addresses, and an immediate offset to be added consistently to
each one. vldrq_gather_offset / vstrq_scatter_offset take a scalar
base address, and a vector of offsets to add to it. The
'shifted_offset' variants also multiply each offset by the element
size type, so that the vector is effectively of array indices.
At the IR level, these operations are represented by a single set of
four IR intrinsics: {gather,scatter} × {base,offset}. The other
details (signed/unsigned, shift, and memory element size as opposed to
vector element size) are all specified by IR intrinsic polymorphism
and immediate operands, because that made the selection job easier
than making a huge family of similarly named intrinsics.
I considered using the standard IR representations such as
llvm.masked.gather, but they're not a good fit. In order to use
llvm.masked.gather to represent a gather_offset load with element size
smaller than a pointer, you'd have to expand the <8 x i16> vector of
offsets into an <8 x i16*> vector of pointers, which would be split up
during legalization, so you'd spend most of your time undoing the mess
it had made. Also, ISel support for llvm.masked.gather would be easy
enough in a trivial way (you can expand it into a gather-base load
with a zero immediate offset), but instruction-selecting lots of
fiddly idioms back into all the _other_ MVE load instructions would be
much more work. So I think dedicated IR intrinsics are the more
sensible approach, at least for the moment.
On the clang tablegen side, I've added two new features to the
Tablegen source accepted by MveEmitter: a 'CopyKind' type node for
defining a type that varies with the parameter type (it lets you ask
for an unsigned integer type of the same width as the parameter), and
an 'unsignedflag' value node for passing an immediate IR operand which
is 0 for a signed integer type or 1 for an unsigned one. That lets me
write each kind of intrinsic just once and get all its subtypes and
immediate arguments generated automatically.
Also I've tweaked the handling of pointer-typed values in the code
generation part of MveEmitter: they're generated as Address rather
than Value (i.e. including an alignment) so that they can be given to
the ordinary IR load and store operations, but I'd omitted the code to
convert them back to Value when they're going to be used as an
argument to an IR intrinsic.
On the MC side, I've enhanced MVEVectorVTInfo so that it can tell you
not only the full assembly-language suffix for a given vector type
(like 's32' or 'u16') but also the numeric-only one used by store
instructions (just '32' or '16').
Reviewers: dmgreen
Subscribers: kristof.beyls, hiraditya, cfe-commits, llvm-commits
Tags: #clang, #llvm
Differential Revision: https://reviews.llvm.org/D69791
2019-11-01 01:02:07 +08:00
|
|
|
defm MVE_VLDRBU8_rq : MVE_VLDR_rq_b<[MVE_v16u8,MVE_v16s8]>;
|
|
|
|
defm MVE_VLDRBU16_rq: MVE_VLDR_rq_b<[MVE_v8u16]>;
|
|
|
|
defm MVE_VLDRBS16_rq: MVE_VLDR_rq_b<[MVE_v8s16]>;
|
|
|
|
defm MVE_VLDRBU32_rq: MVE_VLDR_rq_b<[MVE_v4u32]>;
|
|
|
|
defm MVE_VLDRBS32_rq: MVE_VLDR_rq_b<[MVE_v4s32]>;
|
[ARM] Add MVE vector load/store instructions.
This adds the rest of the vector memory access instructions. It
includes contiguous loads/stores, with an ordinary addressing mode
such as [r0,#offset] (plus writeback variants); gather loads and
scatter stores with a scalar base address register and a vector of
offsets from it (written [r0,q1] or similar); and gather/scatters with
a vector of base addresses (written [q0,#offset], again with
writeback). Additionally, some of the loads can widen each loaded
value into a larger vector lane, and the corresponding stores narrow
them again.
To implement these, we also have to add the addressing modes they
need. Also, in AsmParser, the `isMem` query function now has
subqueries `isGPRMem` and `isMVEMem`, according to which kind of base
register is used by a given memory access operand.
I've also had to add an extra check in `checkTargetMatchPredicate` in
the AsmParser, without which our last-minute check of `rGPR` register
operands against SP and PC was failing an assertion because Tablegen
had inserted an immediate 0 in place of one of a pair of tied register
operands. (This matches the way the corresponding check for `MCK_rGPR`
in `validateTargetOperandClass` is guarded.) Apparently the MVE load
instructions were the first to have ever triggered this assertion, but
I think only because they were the first to have a combination of the
usual Arm pre/post writeback system and the `rGPR` class in particular.
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62680
llvm-svn: 364291
2019-06-25 19:24:18 +08:00
|
|
|
|
[ARM,MVE] Add intrinsics for gather/scatter load/stores.
This patch adds two new families of intrinsics, both of which are
memory accesses taking a vector of locations to load from / store to.
The vldrq_gather_base / vstrq_scatter_base intrinsics take a vector of
base addresses, and an immediate offset to be added consistently to
each one. vldrq_gather_offset / vstrq_scatter_offset take a scalar
base address, and a vector of offsets to add to it. The
'shifted_offset' variants also multiply each offset by the element
size type, so that the vector is effectively of array indices.
At the IR level, these operations are represented by a single set of
four IR intrinsics: {gather,scatter} × {base,offset}. The other
details (signed/unsigned, shift, and memory element size as opposed to
vector element size) are all specified by IR intrinsic polymorphism
and immediate operands, because that made the selection job easier
than making a huge family of similarly named intrinsics.
I considered using the standard IR representations such as
llvm.masked.gather, but they're not a good fit. In order to use
llvm.masked.gather to represent a gather_offset load with element size
smaller than a pointer, you'd have to expand the <8 x i16> vector of
offsets into an <8 x i16*> vector of pointers, which would be split up
during legalization, so you'd spend most of your time undoing the mess
it had made. Also, ISel support for llvm.masked.gather would be easy
enough in a trivial way (you can expand it into a gather-base load
with a zero immediate offset), but instruction-selecting lots of
fiddly idioms back into all the _other_ MVE load instructions would be
much more work. So I think dedicated IR intrinsics are the more
sensible approach, at least for the moment.
On the clang tablegen side, I've added two new features to the
Tablegen source accepted by MveEmitter: a 'CopyKind' type node for
defining a type that varies with the parameter type (it lets you ask
for an unsigned integer type of the same width as the parameter), and
an 'unsignedflag' value node for passing an immediate IR operand which
is 0 for a signed integer type or 1 for an unsigned one. That lets me
write each kind of intrinsic just once and get all its subtypes and
immediate arguments generated automatically.
Also I've tweaked the handling of pointer-typed values in the code
generation part of MveEmitter: they're generated as Address rather
than Value (i.e. including an alignment) so that they can be given to
the ordinary IR load and store operations, but I'd omitted the code to
convert them back to Value when they're going to be used as an
argument to an IR intrinsic.
On the MC side, I've enhanced MVEVectorVTInfo so that it can tell you
not only the full assembly-language suffix for a given vector type
(like 's32' or 'u16') but also the numeric-only one used by store
instructions (just '32' or '16').
Reviewers: dmgreen
Subscribers: kristof.beyls, hiraditya, cfe-commits, llvm-commits
Tags: #clang, #llvm
Differential Revision: https://reviews.llvm.org/D69791
2019-11-01 01:02:07 +08:00
|
|
|
defm MVE_VLDRHU16_rq: MVE_VLDR_rq_w<MVE_memH, [MVE_v8u16,MVE_v8s16,MVE_v8f16]>;
|
|
|
|
defm MVE_VLDRHU32_rq: MVE_VLDR_rq_w<MVE_memH, [MVE_v4u32]>;
|
|
|
|
defm MVE_VLDRHS32_rq: MVE_VLDR_rq_w<MVE_memH, [MVE_v4s32]>;
|
|
|
|
defm MVE_VLDRWU32_rq: MVE_VLDR_rq_w<MVE_memW, [MVE_v4u32,MVE_v4s32,MVE_v4f32]>;
|
|
|
|
defm MVE_VLDRDU64_rq: MVE_VLDR_rq_w<MVE_memD, [MVE_v2u64,MVE_v2s64]>;
|
[ARM] Add MVE vector load/store instructions.
This adds the rest of the vector memory access instructions. It
includes contiguous loads/stores, with an ordinary addressing mode
such as [r0,#offset] (plus writeback variants); gather loads and
scatter stores with a scalar base address register and a vector of
offsets from it (written [r0,q1] or similar); and gather/scatters with
a vector of base addresses (written [q0,#offset], again with
writeback). Additionally, some of the loads can widen each loaded
value into a larger vector lane, and the corresponding stores narrow
them again.
To implement these, we also have to add the addressing modes they
need. Also, in AsmParser, the `isMem` query function now has
subqueries `isGPRMem` and `isMVEMem`, according to which kind of base
register is used by a given memory access operand.
I've also had to add an extra check in `checkTargetMatchPredicate` in
the AsmParser, without which our last-minute check of `rGPR` register
operands against SP and PC was failing an assertion because Tablegen
had inserted an immediate 0 in place of one of a pair of tied register
operands. (This matches the way the corresponding check for `MCK_rGPR`
in `validateTargetOperandClass` is guarded.) Apparently the MVE load
instructions were the first to have ever triggered this assertion, but
I think only because they were the first to have a combination of the
usual Arm pre/post writeback system and the `rGPR` class in particular.
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62680
llvm-svn: 364291
2019-06-25 19:24:18 +08:00
|
|
|
|
[ARM,MVE] Add intrinsics for gather/scatter load/stores.
This patch adds two new families of intrinsics, both of which are
memory accesses taking a vector of locations to load from / store to.
The vldrq_gather_base / vstrq_scatter_base intrinsics take a vector of
base addresses, and an immediate offset to be added consistently to
each one. vldrq_gather_offset / vstrq_scatter_offset take a scalar
base address, and a vector of offsets to add to it. The
'shifted_offset' variants also multiply each offset by the element
size type, so that the vector is effectively of array indices.
At the IR level, these operations are represented by a single set of
four IR intrinsics: {gather,scatter} × {base,offset}. The other
details (signed/unsigned, shift, and memory element size as opposed to
vector element size) are all specified by IR intrinsic polymorphism
and immediate operands, because that made the selection job easier
than making a huge family of similarly named intrinsics.
I considered using the standard IR representations such as
llvm.masked.gather, but they're not a good fit. In order to use
llvm.masked.gather to represent a gather_offset load with element size
smaller than a pointer, you'd have to expand the <8 x i16> vector of
offsets into an <8 x i16*> vector of pointers, which would be split up
during legalization, so you'd spend most of your time undoing the mess
it had made. Also, ISel support for llvm.masked.gather would be easy
enough in a trivial way (you can expand it into a gather-base load
with a zero immediate offset), but instruction-selecting lots of
fiddly idioms back into all the _other_ MVE load instructions would be
much more work. So I think dedicated IR intrinsics are the more
sensible approach, at least for the moment.
On the clang tablegen side, I've added two new features to the
Tablegen source accepted by MveEmitter: a 'CopyKind' type node for
defining a type that varies with the parameter type (it lets you ask
for an unsigned integer type of the same width as the parameter), and
an 'unsignedflag' value node for passing an immediate IR operand which
is 0 for a signed integer type or 1 for an unsigned one. That lets me
write each kind of intrinsic just once and get all its subtypes and
immediate arguments generated automatically.
Also I've tweaked the handling of pointer-typed values in the code
generation part of MveEmitter: they're generated as Address rather
than Value (i.e. including an alignment) so that they can be given to
the ordinary IR load and store operations, but I'd omitted the code to
convert them back to Value when they're going to be used as an
argument to an IR intrinsic.
On the MC side, I've enhanced MVEVectorVTInfo so that it can tell you
not only the full assembly-language suffix for a given vector type
(like 's32' or 'u16') but also the numeric-only one used by store
instructions (just '32' or '16').
Reviewers: dmgreen
Subscribers: kristof.beyls, hiraditya, cfe-commits, llvm-commits
Tags: #clang, #llvm
Differential Revision: https://reviews.llvm.org/D69791
2019-11-01 01:02:07 +08:00
|
|
|
defm MVE_VSTRB8_rq : MVE_VSTR_rq_b<[MVE_v16i8]>;
|
|
|
|
defm MVE_VSTRB16_rq : MVE_VSTR_rq_b<[MVE_v8i16]>;
|
|
|
|
defm MVE_VSTRB32_rq : MVE_VSTR_rq_b<[MVE_v4i32]>;
|
[ARM] Add MVE vector load/store instructions.
This adds the rest of the vector memory access instructions. It
includes contiguous loads/stores, with an ordinary addressing mode
such as [r0,#offset] (plus writeback variants); gather loads and
scatter stores with a scalar base address register and a vector of
offsets from it (written [r0,q1] or similar); and gather/scatters with
a vector of base addresses (written [q0,#offset], again with
writeback). Additionally, some of the loads can widen each loaded
value into a larger vector lane, and the corresponding stores narrow
them again.
To implement these, we also have to add the addressing modes they
need. Also, in AsmParser, the `isMem` query function now has
subqueries `isGPRMem` and `isMVEMem`, according to which kind of base
register is used by a given memory access operand.
I've also had to add an extra check in `checkTargetMatchPredicate` in
the AsmParser, without which our last-minute check of `rGPR` register
operands against SP and PC was failing an assertion because Tablegen
had inserted an immediate 0 in place of one of a pair of tied register
operands. (This matches the way the corresponding check for `MCK_rGPR`
in `validateTargetOperandClass` is guarded.) Apparently the MVE load
instructions were the first to have ever triggered this assertion, but
I think only because they were the first to have a combination of the
usual Arm pre/post writeback system and the `rGPR` class in particular.
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62680
llvm-svn: 364291
2019-06-25 19:24:18 +08:00
|
|
|
|
[ARM,MVE] Add intrinsics for gather/scatter load/stores.
This patch adds two new families of intrinsics, both of which are
memory accesses taking a vector of locations to load from / store to.
The vldrq_gather_base / vstrq_scatter_base intrinsics take a vector of
base addresses, and an immediate offset to be added consistently to
each one. vldrq_gather_offset / vstrq_scatter_offset take a scalar
base address, and a vector of offsets to add to it. The
'shifted_offset' variants also multiply each offset by the element
size type, so that the vector is effectively of array indices.
At the IR level, these operations are represented by a single set of
four IR intrinsics: {gather,scatter} × {base,offset}. The other
details (signed/unsigned, shift, and memory element size as opposed to
vector element size) are all specified by IR intrinsic polymorphism
and immediate operands, because that made the selection job easier
than making a huge family of similarly named intrinsics.
I considered using the standard IR representations such as
llvm.masked.gather, but they're not a good fit. In order to use
llvm.masked.gather to represent a gather_offset load with element size
smaller than a pointer, you'd have to expand the <8 x i16> vector of
offsets into an <8 x i16*> vector of pointers, which would be split up
during legalization, so you'd spend most of your time undoing the mess
it had made. Also, ISel support for llvm.masked.gather would be easy
enough in a trivial way (you can expand it into a gather-base load
with a zero immediate offset), but instruction-selecting lots of
fiddly idioms back into all the _other_ MVE load instructions would be
much more work. So I think dedicated IR intrinsics are the more
sensible approach, at least for the moment.
On the clang tablegen side, I've added two new features to the
Tablegen source accepted by MveEmitter: a 'CopyKind' type node for
defining a type that varies with the parameter type (it lets you ask
for an unsigned integer type of the same width as the parameter), and
an 'unsignedflag' value node for passing an immediate IR operand which
is 0 for a signed integer type or 1 for an unsigned one. That lets me
write each kind of intrinsic just once and get all its subtypes and
immediate arguments generated automatically.
Also I've tweaked the handling of pointer-typed values in the code
generation part of MveEmitter: they're generated as Address rather
than Value (i.e. including an alignment) so that they can be given to
the ordinary IR load and store operations, but I'd omitted the code to
convert them back to Value when they're going to be used as an
argument to an IR intrinsic.
On the MC side, I've enhanced MVEVectorVTInfo so that it can tell you
not only the full assembly-language suffix for a given vector type
(like 's32' or 'u16') but also the numeric-only one used by store
instructions (just '32' or '16').
Reviewers: dmgreen
Subscribers: kristof.beyls, hiraditya, cfe-commits, llvm-commits
Tags: #clang, #llvm
Differential Revision: https://reviews.llvm.org/D69791
2019-11-01 01:02:07 +08:00
|
|
|
defm MVE_VSTRH16_rq : MVE_VSTR_rq_w<MVE_memH, [MVE_v8i16,MVE_v8f16]>;
|
|
|
|
defm MVE_VSTRH32_rq : MVE_VSTR_rq_w<MVE_memH, [MVE_v4i32]>;
|
|
|
|
defm MVE_VSTRW32_rq : MVE_VSTR_rq_w<MVE_memW, [MVE_v4i32,MVE_v4f32]>;
|
|
|
|
defm MVE_VSTRD64_rq : MVE_VSTR_rq_w<MVE_memD, [MVE_v2i64]>;
|
[ARM] Add MVE vector load/store instructions.
This adds the rest of the vector memory access instructions. It
includes contiguous loads/stores, with an ordinary addressing mode
such as [r0,#offset] (plus writeback variants); gather loads and
scatter stores with a scalar base address register and a vector of
offsets from it (written [r0,q1] or similar); and gather/scatters with
a vector of base addresses (written [q0,#offset], again with
writeback). Additionally, some of the loads can widen each loaded
value into a larger vector lane, and the corresponding stores narrow
them again.
To implement these, we also have to add the addressing modes they
need. Also, in AsmParser, the `isMem` query function now has
subqueries `isGPRMem` and `isMVEMem`, according to which kind of base
register is used by a given memory access operand.
I've also had to add an extra check in `checkTargetMatchPredicate` in
the AsmParser, without which our last-minute check of `rGPR` register
operands against SP and PC was failing an assertion because Tablegen
had inserted an immediate 0 in place of one of a pair of tied register
operands. (This matches the way the corresponding check for `MCK_rGPR`
in `validateTargetOperandClass` is guarded.) Apparently the MVE load
instructions were the first to have ever triggered this assertion, but
I think only because they were the first to have a combination of the
usual Arm pre/post writeback system and the `rGPR` class in particular.
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62680
llvm-svn: 364291
2019-06-25 19:24:18 +08:00
|
|
|
|
|
|
|
// Gather loads / scatter stores whose address operand is of the form
|
|
|
|
// [Qm,#imm], i.e. a vector containing a full base address for each
|
|
|
|
// loaded item, plus an immediate offset applied consistently to all
|
|
|
|
// of them. ('Load/store the same field from this vector of pointers
|
|
|
|
// to a structure type.')
|
|
|
|
//
|
|
|
|
// This family requires the vector lane size to be at least 32 bits
|
|
|
|
// (so there's room for an address in each lane at all). It has no
|
|
|
|
// widening/narrowing variants. But it does support preindex
|
|
|
|
// writeback, in which the address vector is updated to hold the
|
|
|
|
// addresses actually loaded from.
|
|
|
|
|
|
|
|
// Base class.
|
|
|
|
class MVE_VLDRSTR_qi<MVE_ldst_direction dir, MVE_memsz memsz, bit W, dag wbops,
|
|
|
|
string asm, string wbAsm, string suffix, string cstr = "">
|
|
|
|
: MVE_VLDRSTR_base<dir, 1, 1, W, 1, !con(wbops, dir.Oops),
|
|
|
|
!con(dir.Iops, (ins mve_addr_q_shift<memsz.shift>:$addr)),
|
|
|
|
asm, suffix, "$Qd, $addr" # wbAsm, cstr # dir.cstr> {
|
|
|
|
bits<11> addr;
|
|
|
|
let Inst{23} = addr{7};
|
|
|
|
let Inst{19-17} = addr{10-8};
|
|
|
|
let Inst{16} = 0;
|
|
|
|
let Inst{8} = memsz.encoding{0}; // enough to distinguish 32- from 64-bit
|
|
|
|
let Inst{7} = 0;
|
|
|
|
let Inst{6-0} = addr{6-0};
|
|
|
|
}
|
|
|
|
|
|
|
|
// Multiclass that generates the non-writeback and writeback variants.
|
|
|
|
multiclass MVE_VLDRSTR_qi_m<MVE_ldst_direction dir, MVE_memsz memsz,
|
|
|
|
string asm, string suffix> {
|
|
|
|
def "" : MVE_VLDRSTR_qi<dir, memsz, 0, (outs), asm, "", suffix>;
|
|
|
|
def _pre : MVE_VLDRSTR_qi<dir, memsz, 1, (outs MQPR:$wb), asm, "!", suffix,
|
|
|
|
"$addr.base = $wb"> {
|
|
|
|
let DecoderMethod="DecodeMVE_MEM_3_pre<"#memsz.shift#">";
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
[ARM,MVE] Add intrinsics for gather/scatter load/stores.
This patch adds two new families of intrinsics, both of which are
memory accesses taking a vector of locations to load from / store to.
The vldrq_gather_base / vstrq_scatter_base intrinsics take a vector of
base addresses, and an immediate offset to be added consistently to
each one. vldrq_gather_offset / vstrq_scatter_offset take a scalar
base address, and a vector of offsets to add to it. The
'shifted_offset' variants also multiply each offset by the element
size type, so that the vector is effectively of array indices.
At the IR level, these operations are represented by a single set of
four IR intrinsics: {gather,scatter} × {base,offset}. The other
details (signed/unsigned, shift, and memory element size as opposed to
vector element size) are all specified by IR intrinsic polymorphism
and immediate operands, because that made the selection job easier
than making a huge family of similarly named intrinsics.
I considered using the standard IR representations such as
llvm.masked.gather, but they're not a good fit. In order to use
llvm.masked.gather to represent a gather_offset load with element size
smaller than a pointer, you'd have to expand the <8 x i16> vector of
offsets into an <8 x i16*> vector of pointers, which would be split up
during legalization, so you'd spend most of your time undoing the mess
it had made. Also, ISel support for llvm.masked.gather would be easy
enough in a trivial way (you can expand it into a gather-base load
with a zero immediate offset), but instruction-selecting lots of
fiddly idioms back into all the _other_ MVE load instructions would be
much more work. So I think dedicated IR intrinsics are the more
sensible approach, at least for the moment.
On the clang tablegen side, I've added two new features to the
Tablegen source accepted by MveEmitter: a 'CopyKind' type node for
defining a type that varies with the parameter type (it lets you ask
for an unsigned integer type of the same width as the parameter), and
an 'unsignedflag' value node for passing an immediate IR operand which
is 0 for a signed integer type or 1 for an unsigned one. That lets me
write each kind of intrinsic just once and get all its subtypes and
immediate arguments generated automatically.
Also I've tweaked the handling of pointer-typed values in the code
generation part of MveEmitter: they're generated as Address rather
than Value (i.e. including an alignment) so that they can be given to
the ordinary IR load and store operations, but I'd omitted the code to
convert them back to Value when they're going to be used as an
argument to an IR intrinsic.
On the MC side, I've enhanced MVEVectorVTInfo so that it can tell you
not only the full assembly-language suffix for a given vector type
(like 's32' or 'u16') but also the numeric-only one used by store
instructions (just '32' or '16').
Reviewers: dmgreen
Subscribers: kristof.beyls, hiraditya, cfe-commits, llvm-commits
Tags: #clang, #llvm
Differential Revision: https://reviews.llvm.org/D69791
2019-11-01 01:02:07 +08:00
|
|
|
// Multiclasses wrapping that one, adding selection patterns for the
|
|
|
|
// non-writeback loads and all the stores. (The writeback loads must
|
|
|
|
// deliver multiple output values, so they have to be selected by C++
|
|
|
|
// code.)
|
|
|
|
multiclass MVE_VLDR_qi<MVE_memsz memsz, MVEVectorVTInfo AVTI,
|
|
|
|
list<MVEVectorVTInfo> DVTIs> {
|
|
|
|
defm "" : MVE_VLDRSTR_qi_m<MVE_ld, memsz, "vldr" # memsz.MnemonicLetter,
|
|
|
|
"u" # memsz.TypeBits>;
|
|
|
|
|
|
|
|
foreach DVTI = DVTIs in {
|
|
|
|
def : Pat<(DVTI.Vec (int_arm_mve_vldr_gather_base
|
|
|
|
(AVTI.Vec MQPR:$addr), (i32 imm:$offset))),
|
|
|
|
(DVTI.Vec (!cast<Instruction>(NAME)
|
|
|
|
(AVTI.Vec MQPR:$addr), (i32 imm:$offset)))>;
|
|
|
|
def : Pat<(DVTI.Vec (int_arm_mve_vldr_gather_base_predicated
|
|
|
|
(AVTI.Vec MQPR:$addr), (i32 imm:$offset), (AVTI.Pred VCCR:$pred))),
|
|
|
|
(DVTI.Vec (!cast<Instruction>(NAME)
|
|
|
|
(AVTI.Vec MQPR:$addr), (i32 imm:$offset), 1, VCCR:$pred))>;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
multiclass MVE_VSTR_qi<MVE_memsz memsz, MVEVectorVTInfo AVTI,
|
|
|
|
list<MVEVectorVTInfo> DVTIs> {
|
|
|
|
defm "" : MVE_VLDRSTR_qi_m<MVE_st, memsz, "vstr" # memsz.MnemonicLetter,
|
|
|
|
!cast<string>(memsz.TypeBits)>;
|
|
|
|
|
|
|
|
foreach DVTI = DVTIs in {
|
|
|
|
def : Pat<(int_arm_mve_vstr_scatter_base
|
|
|
|
(AVTI.Vec MQPR:$addr), (i32 imm:$offset), (DVTI.Vec MQPR:$data)),
|
|
|
|
(!cast<Instruction>(NAME)
|
|
|
|
(DVTI.Vec MQPR:$data), (AVTI.Vec MQPR:$addr), (i32 imm:$offset))>;
|
|
|
|
def : Pat<(int_arm_mve_vstr_scatter_base_predicated
|
|
|
|
(AVTI.Vec MQPR:$addr), (i32 imm:$offset), (DVTI.Vec MQPR:$data), (AVTI.Pred VCCR:$pred)),
|
|
|
|
(!cast<Instruction>(NAME)
|
|
|
|
(DVTI.Vec MQPR:$data), (AVTI.Vec MQPR:$addr), (i32 imm:$offset), 1, VCCR:$pred)>;
|
|
|
|
def : Pat<(AVTI.Vec (int_arm_mve_vstr_scatter_base_wb
|
|
|
|
(AVTI.Vec MQPR:$addr), (i32 imm:$offset), (DVTI.Vec MQPR:$data))),
|
|
|
|
(AVTI.Vec (!cast<Instruction>(NAME # "_pre")
|
|
|
|
(DVTI.Vec MQPR:$data), (AVTI.Vec MQPR:$addr), (i32 imm:$offset)))>;
|
|
|
|
def : Pat<(AVTI.Vec (int_arm_mve_vstr_scatter_base_wb_predicated
|
|
|
|
(AVTI.Vec MQPR:$addr), (i32 imm:$offset), (DVTI.Vec MQPR:$data), (AVTI.Pred VCCR:$pred))),
|
|
|
|
(AVTI.Vec (!cast<Instruction>(NAME # "_pre")
|
|
|
|
(DVTI.Vec MQPR:$data), (AVTI.Vec MQPR:$addr), (i32 imm:$offset), 1, VCCR:$pred))>;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
[ARM] Add MVE vector load/store instructions.
This adds the rest of the vector memory access instructions. It
includes contiguous loads/stores, with an ordinary addressing mode
such as [r0,#offset] (plus writeback variants); gather loads and
scatter stores with a scalar base address register and a vector of
offsets from it (written [r0,q1] or similar); and gather/scatters with
a vector of base addresses (written [q0,#offset], again with
writeback). Additionally, some of the loads can widen each loaded
value into a larger vector lane, and the corresponding stores narrow
them again.
To implement these, we also have to add the addressing modes they
need. Also, in AsmParser, the `isMem` query function now has
subqueries `isGPRMem` and `isMVEMem`, according to which kind of base
register is used by a given memory access operand.
I've also had to add an extra check in `checkTargetMatchPredicate` in
the AsmParser, without which our last-minute check of `rGPR` register
operands against SP and PC was failing an assertion because Tablegen
had inserted an immediate 0 in place of one of a pair of tied register
operands. (This matches the way the corresponding check for `MCK_rGPR`
in `validateTargetOperandClass` is guarded.) Apparently the MVE load
instructions were the first to have ever triggered this assertion, but
I think only because they were the first to have a combination of the
usual Arm pre/post writeback system and the `rGPR` class in particular.
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62680
llvm-svn: 364291
2019-06-25 19:24:18 +08:00
|
|
|
// Actual instruction definitions.
|
[ARM,MVE] Add intrinsics for gather/scatter load/stores.
This patch adds two new families of intrinsics, both of which are
memory accesses taking a vector of locations to load from / store to.
The vldrq_gather_base / vstrq_scatter_base intrinsics take a vector of
base addresses, and an immediate offset to be added consistently to
each one. vldrq_gather_offset / vstrq_scatter_offset take a scalar
base address, and a vector of offsets to add to it. The
'shifted_offset' variants also multiply each offset by the element
size type, so that the vector is effectively of array indices.
At the IR level, these operations are represented by a single set of
four IR intrinsics: {gather,scatter} × {base,offset}. The other
details (signed/unsigned, shift, and memory element size as opposed to
vector element size) are all specified by IR intrinsic polymorphism
and immediate operands, because that made the selection job easier
than making a huge family of similarly named intrinsics.
I considered using the standard IR representations such as
llvm.masked.gather, but they're not a good fit. In order to use
llvm.masked.gather to represent a gather_offset load with element size
smaller than a pointer, you'd have to expand the <8 x i16> vector of
offsets into an <8 x i16*> vector of pointers, which would be split up
during legalization, so you'd spend most of your time undoing the mess
it had made. Also, ISel support for llvm.masked.gather would be easy
enough in a trivial way (you can expand it into a gather-base load
with a zero immediate offset), but instruction-selecting lots of
fiddly idioms back into all the _other_ MVE load instructions would be
much more work. So I think dedicated IR intrinsics are the more
sensible approach, at least for the moment.
On the clang tablegen side, I've added two new features to the
Tablegen source accepted by MveEmitter: a 'CopyKind' type node for
defining a type that varies with the parameter type (it lets you ask
for an unsigned integer type of the same width as the parameter), and
an 'unsignedflag' value node for passing an immediate IR operand which
is 0 for a signed integer type or 1 for an unsigned one. That lets me
write each kind of intrinsic just once and get all its subtypes and
immediate arguments generated automatically.
Also I've tweaked the handling of pointer-typed values in the code
generation part of MveEmitter: they're generated as Address rather
than Value (i.e. including an alignment) so that they can be given to
the ordinary IR load and store operations, but I'd omitted the code to
convert them back to Value when they're going to be used as an
argument to an IR intrinsic.
On the MC side, I've enhanced MVEVectorVTInfo so that it can tell you
not only the full assembly-language suffix for a given vector type
(like 's32' or 'u16') but also the numeric-only one used by store
instructions (just '32' or '16').
Reviewers: dmgreen
Subscribers: kristof.beyls, hiraditya, cfe-commits, llvm-commits
Tags: #clang, #llvm
Differential Revision: https://reviews.llvm.org/D69791
2019-11-01 01:02:07 +08:00
|
|
|
defm MVE_VLDRWU32_qi: MVE_VLDR_qi<MVE_memW, MVE_v4i32, [MVE_v4i32,MVE_v4f32]>;
|
|
|
|
defm MVE_VLDRDU64_qi: MVE_VLDR_qi<MVE_memD, MVE_v2i64, [MVE_v2i64,MVE_v2f64]>;
|
|
|
|
defm MVE_VSTRW32_qi: MVE_VSTR_qi<MVE_memW, MVE_v4i32, [MVE_v4i32,MVE_v4f32]>;
|
|
|
|
defm MVE_VSTRD64_qi: MVE_VSTR_qi<MVE_memD, MVE_v2i64, [MVE_v2i64,MVE_v2f64]>;
|
[ARM] Add MVE vector load/store instructions.
This adds the rest of the vector memory access instructions. It
includes contiguous loads/stores, with an ordinary addressing mode
such as [r0,#offset] (plus writeback variants); gather loads and
scatter stores with a scalar base address register and a vector of
offsets from it (written [r0,q1] or similar); and gather/scatters with
a vector of base addresses (written [q0,#offset], again with
writeback). Additionally, some of the loads can widen each loaded
value into a larger vector lane, and the corresponding stores narrow
them again.
To implement these, we also have to add the addressing modes they
need. Also, in AsmParser, the `isMem` query function now has
subqueries `isGPRMem` and `isMVEMem`, according to which kind of base
register is used by a given memory access operand.
I've also had to add an extra check in `checkTargetMatchPredicate` in
the AsmParser, without which our last-minute check of `rGPR` register
operands against SP and PC was failing an assertion because Tablegen
had inserted an immediate 0 in place of one of a pair of tied register
operands. (This matches the way the corresponding check for `MCK_rGPR`
in `validateTargetOperandClass` is guarded.) Apparently the MVE load
instructions were the first to have ever triggered this assertion, but
I think only because they were the first to have a combination of the
usual Arm pre/post writeback system and the `rGPR` class in particular.
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62680
llvm-svn: 364291
2019-06-25 19:24:18 +08:00
|
|
|
|
|
|
|
// Define aliases for all the instructions where memory size and
|
|
|
|
// vector lane size are the same. These are mnemonic aliases, so they
|
|
|
|
// apply consistently across all of the above families - contiguous
|
|
|
|
// loads, and both the rq and qi types of gather/scatter.
|
|
|
|
//
|
|
|
|
// Rationale: As long as you're loading (for example) 16-bit memory
|
|
|
|
// values into 16-bit vector lanes, you can think of them as signed or
|
|
|
|
// unsigned integers, fp16 or just raw 16-bit blobs and it makes no
|
|
|
|
// difference. So we permit all of vldrh.16, vldrh.u16, vldrh.s16,
|
|
|
|
// vldrh.f16 and treat them all as equivalent to the canonical
|
|
|
|
// spelling (which happens to be .u16 for loads, and just .16 for
|
|
|
|
// stores).
|
|
|
|
|
|
|
|
foreach vpt_cond = ["", "t", "e"] in
|
|
|
|
foreach memsz = [MVE_memB, MVE_memH, MVE_memW, MVE_memD] in
|
|
|
|
foreach suffix = memsz.suffixes in {
|
|
|
|
|
|
|
|
// These foreaches are conceptually ifs, implemented by iterating a
|
|
|
|
// dummy variable over a list with 0 or 1 elements depending on the
|
|
|
|
// condition. The idea is to iterate over _nearly_ all the suffixes
|
|
|
|
// in memsz.suffixes, but omit the one we want all the others to alias.
|
|
|
|
|
|
|
|
foreach _ = !if(!ne(suffix, memsz.CanonLoadSuffix), [1], []<int>) in
|
|
|
|
def : MnemonicAlias<
|
|
|
|
"vldr" # memsz.MnemonicLetter # vpt_cond # suffix,
|
|
|
|
"vldr" # memsz.MnemonicLetter # vpt_cond # memsz.CanonLoadSuffix>;
|
|
|
|
|
|
|
|
foreach _ = !if(!ne(suffix, memsz.CanonStoreSuffix), [1], []<int>) in
|
|
|
|
def : MnemonicAlias<
|
|
|
|
"vstr" # memsz.MnemonicLetter # vpt_cond # suffix,
|
|
|
|
"vstr" # memsz.MnemonicLetter # vpt_cond # memsz.CanonStoreSuffix>;
|
|
|
|
}
|
|
|
|
|
|
|
|
// end of MVE predicable load/store
|
|
|
|
|
2019-06-18 23:05:42 +08:00
|
|
|
class MVE_VPT<string suffix, bits<2> size, dag iops, string asm, list<dag> pattern=[]>
|
[ARM] Set up infrastructure for MVE vector instructions.
This commit prepares the way to start adding the main collection of
MVE instructions, which operate on the 128-bit vector registers.
The most obvious thing that's needed, and the simplest, is to add the
MQPR register class, which is like the existing QPR except that it has
fewer registers in it.
The more complicated part: MVE defines a system of vector predication,
in which instructions operating on 128-bit vector registers can be
constrained to operate on only a subset of the lanes, using a system
of prefix instructions similar to the existing Thumb IT, in that you
have one prefix instruction which designates up to 4 following
instructions as subject to predication, and within that sequence, the
predicate can be inverted by means of T/E suffixes ('Then' / 'Else').
To support instructions of this type, we've added two new Tablegen
classes `vpred_n` and `vpred_r` for standard clusters of MC operands
to add to a predicated instruction. Both include a flag indicating how
the instruction is predicated at all (options are T, E and 'not
predicated'), and an input register field for the register controlling
the set of active lanes. They differ from each other in that `vpred_r`
also includes an input operand for the previous value of the output
register, for instructions that leave inactive lanes unchanged.
`vpred_n` lacks that extra operand; it will be used for instructions
that don't preserve inactive lanes in their output register (either
because inactive lanes are zeroed, as the MVE load instructions do, or
because the output register isn't a vector at all).
This commit also adds the family of prefix instructions themselves
(VPT / VPST), and all the machinery needed to work with them in
assembly and disassembly (e.g. generating the 't' and 'e' mnemonic
suffixes on disassembled instructions within a predicated block)
I've added a couple of demo instructions that derive from the new
Tablegen base classes and use those two operand clusters. The bulk of
the vector instructions will come in followup commits small enough to
be manageable. (One exception is that I've added the full version of
`isMnemonicVPTPredicable` in the AsmParser, because it seemed
pointless to carefully split it up.)
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62669
llvm-svn: 363258
2019-06-13 21:11:13 +08:00
|
|
|
: MVE_MI<(outs ), iops, NoItinerary, !strconcat("vpt", "${Mk}", ".", suffix), asm, "", pattern> {
|
|
|
|
bits<3> fc;
|
|
|
|
bits<4> Mk;
|
|
|
|
bits<3> Qn;
|
|
|
|
|
|
|
|
let Inst{31-23} = 0b111111100;
|
|
|
|
let Inst{22} = Mk{3};
|
|
|
|
let Inst{21-20} = size;
|
|
|
|
let Inst{19-17} = Qn{2-0};
|
|
|
|
let Inst{16} = 0b1;
|
|
|
|
let Inst{15-13} = Mk{2-0};
|
|
|
|
let Inst{12} = fc{2};
|
|
|
|
let Inst{11-8} = 0b1111;
|
|
|
|
let Inst{7} = fc{0};
|
|
|
|
let Inst{4} = 0b0;
|
|
|
|
|
2019-09-16 21:02:41 +08:00
|
|
|
let Defs = [VPR];
|
2019-10-15 21:12:51 +08:00
|
|
|
let validForTailPredication = 1;
|
[ARM] Set up infrastructure for MVE vector instructions.
This commit prepares the way to start adding the main collection of
MVE instructions, which operate on the 128-bit vector registers.
The most obvious thing that's needed, and the simplest, is to add the
MQPR register class, which is like the existing QPR except that it has
fewer registers in it.
The more complicated part: MVE defines a system of vector predication,
in which instructions operating on 128-bit vector registers can be
constrained to operate on only a subset of the lanes, using a system
of prefix instructions similar to the existing Thumb IT, in that you
have one prefix instruction which designates up to 4 following
instructions as subject to predication, and within that sequence, the
predicate can be inverted by means of T/E suffixes ('Then' / 'Else').
To support instructions of this type, we've added two new Tablegen
classes `vpred_n` and `vpred_r` for standard clusters of MC operands
to add to a predicated instruction. Both include a flag indicating how
the instruction is predicated at all (options are T, E and 'not
predicated'), and an input register field for the register controlling
the set of active lanes. They differ from each other in that `vpred_r`
also includes an input operand for the previous value of the output
register, for instructions that leave inactive lanes unchanged.
`vpred_n` lacks that extra operand; it will be used for instructions
that don't preserve inactive lanes in their output register (either
because inactive lanes are zeroed, as the MVE load instructions do, or
because the output register isn't a vector at all).
This commit also adds the family of prefix instructions themselves
(VPT / VPST), and all the machinery needed to work with them in
assembly and disassembly (e.g. generating the 't' and 'e' mnemonic
suffixes on disassembled instructions within a predicated block)
I've added a couple of demo instructions that derive from the new
Tablegen base classes and use those two operand clusters. The bulk of
the vector instructions will come in followup commits small enough to
be manageable. (One exception is that I've added the full version of
`isMnemonicVPTPredicable` in the AsmParser, because it seemed
pointless to carefully split it up.)
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62669
llvm-svn: 363258
2019-06-13 21:11:13 +08:00
|
|
|
}
|
|
|
|
|
2019-06-18 23:05:42 +08:00
|
|
|
class MVE_VPTt1<string suffix, bits<2> size, dag iops>
|
|
|
|
: MVE_VPT<suffix, size, iops, "$fc, $Qn, $Qm"> {
|
[ARM] Set up infrastructure for MVE vector instructions.
This commit prepares the way to start adding the main collection of
MVE instructions, which operate on the 128-bit vector registers.
The most obvious thing that's needed, and the simplest, is to add the
MQPR register class, which is like the existing QPR except that it has
fewer registers in it.
The more complicated part: MVE defines a system of vector predication,
in which instructions operating on 128-bit vector registers can be
constrained to operate on only a subset of the lanes, using a system
of prefix instructions similar to the existing Thumb IT, in that you
have one prefix instruction which designates up to 4 following
instructions as subject to predication, and within that sequence, the
predicate can be inverted by means of T/E suffixes ('Then' / 'Else').
To support instructions of this type, we've added two new Tablegen
classes `vpred_n` and `vpred_r` for standard clusters of MC operands
to add to a predicated instruction. Both include a flag indicating how
the instruction is predicated at all (options are T, E and 'not
predicated'), and an input register field for the register controlling
the set of active lanes. They differ from each other in that `vpred_r`
also includes an input operand for the previous value of the output
register, for instructions that leave inactive lanes unchanged.
`vpred_n` lacks that extra operand; it will be used for instructions
that don't preserve inactive lanes in their output register (either
because inactive lanes are zeroed, as the MVE load instructions do, or
because the output register isn't a vector at all).
This commit also adds the family of prefix instructions themselves
(VPT / VPST), and all the machinery needed to work with them in
assembly and disassembly (e.g. generating the 't' and 'e' mnemonic
suffixes on disassembled instructions within a predicated block)
I've added a couple of demo instructions that derive from the new
Tablegen base classes and use those two operand clusters. The bulk of
the vector instructions will come in followup commits small enough to
be manageable. (One exception is that I've added the full version of
`isMnemonicVPTPredicable` in the AsmParser, because it seemed
pointless to carefully split it up.)
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62669
llvm-svn: 363258
2019-06-13 21:11:13 +08:00
|
|
|
bits<4> Qm;
|
|
|
|
bits<4> Mk;
|
|
|
|
|
|
|
|
let Inst{6} = 0b0;
|
|
|
|
let Inst{5} = Qm{3};
|
|
|
|
let Inst{3-1} = Qm{2-0};
|
|
|
|
let Inst{0} = fc{1};
|
2019-10-15 21:12:51 +08:00
|
|
|
let validForTailPredication = 1;
|
[ARM] Set up infrastructure for MVE vector instructions.
This commit prepares the way to start adding the main collection of
MVE instructions, which operate on the 128-bit vector registers.
The most obvious thing that's needed, and the simplest, is to add the
MQPR register class, which is like the existing QPR except that it has
fewer registers in it.
The more complicated part: MVE defines a system of vector predication,
in which instructions operating on 128-bit vector registers can be
constrained to operate on only a subset of the lanes, using a system
of prefix instructions similar to the existing Thumb IT, in that you
have one prefix instruction which designates up to 4 following
instructions as subject to predication, and within that sequence, the
predicate can be inverted by means of T/E suffixes ('Then' / 'Else').
To support instructions of this type, we've added two new Tablegen
classes `vpred_n` and `vpred_r` for standard clusters of MC operands
to add to a predicated instruction. Both include a flag indicating how
the instruction is predicated at all (options are T, E and 'not
predicated'), and an input register field for the register controlling
the set of active lanes. They differ from each other in that `vpred_r`
also includes an input operand for the previous value of the output
register, for instructions that leave inactive lanes unchanged.
`vpred_n` lacks that extra operand; it will be used for instructions
that don't preserve inactive lanes in their output register (either
because inactive lanes are zeroed, as the MVE load instructions do, or
because the output register isn't a vector at all).
This commit also adds the family of prefix instructions themselves
(VPT / VPST), and all the machinery needed to work with them in
assembly and disassembly (e.g. generating the 't' and 'e' mnemonic
suffixes on disassembled instructions within a predicated block)
I've added a couple of demo instructions that derive from the new
Tablegen base classes and use those two operand clusters. The bulk of
the vector instructions will come in followup commits small enough to
be manageable. (One exception is that I've added the full version of
`isMnemonicVPTPredicable` in the AsmParser, because it seemed
pointless to carefully split it up.)
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62669
llvm-svn: 363258
2019-06-13 21:11:13 +08:00
|
|
|
}
|
|
|
|
|
2019-06-18 23:05:42 +08:00
|
|
|
class MVE_VPTt1i<string suffix, bits<2> size>
|
|
|
|
: MVE_VPTt1<suffix, size,
|
2019-09-16 21:02:41 +08:00
|
|
|
(ins vpt_mask:$Mk, MQPR:$Qn, MQPR:$Qm, pred_basic_i:$fc)> {
|
[ARM] Set up infrastructure for MVE vector instructions.
This commit prepares the way to start adding the main collection of
MVE instructions, which operate on the 128-bit vector registers.
The most obvious thing that's needed, and the simplest, is to add the
MQPR register class, which is like the existing QPR except that it has
fewer registers in it.
The more complicated part: MVE defines a system of vector predication,
in which instructions operating on 128-bit vector registers can be
constrained to operate on only a subset of the lanes, using a system
of prefix instructions similar to the existing Thumb IT, in that you
have one prefix instruction which designates up to 4 following
instructions as subject to predication, and within that sequence, the
predicate can be inverted by means of T/E suffixes ('Then' / 'Else').
To support instructions of this type, we've added two new Tablegen
classes `vpred_n` and `vpred_r` for standard clusters of MC operands
to add to a predicated instruction. Both include a flag indicating how
the instruction is predicated at all (options are T, E and 'not
predicated'), and an input register field for the register controlling
the set of active lanes. They differ from each other in that `vpred_r`
also includes an input operand for the previous value of the output
register, for instructions that leave inactive lanes unchanged.
`vpred_n` lacks that extra operand; it will be used for instructions
that don't preserve inactive lanes in their output register (either
because inactive lanes are zeroed, as the MVE load instructions do, or
because the output register isn't a vector at all).
This commit also adds the family of prefix instructions themselves
(VPT / VPST), and all the machinery needed to work with them in
assembly and disassembly (e.g. generating the 't' and 'e' mnemonic
suffixes on disassembled instructions within a predicated block)
I've added a couple of demo instructions that derive from the new
Tablegen base classes and use those two operand clusters. The bulk of
the vector instructions will come in followup commits small enough to
be manageable. (One exception is that I've added the full version of
`isMnemonicVPTPredicable` in the AsmParser, because it seemed
pointless to carefully split it up.)
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62669
llvm-svn: 363258
2019-06-13 21:11:13 +08:00
|
|
|
let Inst{12} = 0b0;
|
|
|
|
let Inst{0} = 0b0;
|
|
|
|
}
|
|
|
|
|
2019-06-18 23:05:42 +08:00
|
|
|
def MVE_VPTv4i32 : MVE_VPTt1i<"i32", 0b10>;
|
|
|
|
def MVE_VPTv8i16 : MVE_VPTt1i<"i16", 0b01>;
|
|
|
|
def MVE_VPTv16i8 : MVE_VPTt1i<"i8", 0b00>;
|
[ARM] Set up infrastructure for MVE vector instructions.
This commit prepares the way to start adding the main collection of
MVE instructions, which operate on the 128-bit vector registers.
The most obvious thing that's needed, and the simplest, is to add the
MQPR register class, which is like the existing QPR except that it has
fewer registers in it.
The more complicated part: MVE defines a system of vector predication,
in which instructions operating on 128-bit vector registers can be
constrained to operate on only a subset of the lanes, using a system
of prefix instructions similar to the existing Thumb IT, in that you
have one prefix instruction which designates up to 4 following
instructions as subject to predication, and within that sequence, the
predicate can be inverted by means of T/E suffixes ('Then' / 'Else').
To support instructions of this type, we've added two new Tablegen
classes `vpred_n` and `vpred_r` for standard clusters of MC operands
to add to a predicated instruction. Both include a flag indicating how
the instruction is predicated at all (options are T, E and 'not
predicated'), and an input register field for the register controlling
the set of active lanes. They differ from each other in that `vpred_r`
also includes an input operand for the previous value of the output
register, for instructions that leave inactive lanes unchanged.
`vpred_n` lacks that extra operand; it will be used for instructions
that don't preserve inactive lanes in their output register (either
because inactive lanes are zeroed, as the MVE load instructions do, or
because the output register isn't a vector at all).
This commit also adds the family of prefix instructions themselves
(VPT / VPST), and all the machinery needed to work with them in
assembly and disassembly (e.g. generating the 't' and 'e' mnemonic
suffixes on disassembled instructions within a predicated block)
I've added a couple of demo instructions that derive from the new
Tablegen base classes and use those two operand clusters. The bulk of
the vector instructions will come in followup commits small enough to
be manageable. (One exception is that I've added the full version of
`isMnemonicVPTPredicable` in the AsmParser, because it seemed
pointless to carefully split it up.)
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62669
llvm-svn: 363258
2019-06-13 21:11:13 +08:00
|
|
|
|
2019-06-18 23:05:42 +08:00
|
|
|
class MVE_VPTt1u<string suffix, bits<2> size>
|
|
|
|
: MVE_VPTt1<suffix, size,
|
2019-09-16 21:02:41 +08:00
|
|
|
(ins vpt_mask:$Mk, MQPR:$Qn, MQPR:$Qm, pred_basic_u:$fc)> {
|
[ARM] Set up infrastructure for MVE vector instructions.
This commit prepares the way to start adding the main collection of
MVE instructions, which operate on the 128-bit vector registers.
The most obvious thing that's needed, and the simplest, is to add the
MQPR register class, which is like the existing QPR except that it has
fewer registers in it.
The more complicated part: MVE defines a system of vector predication,
in which instructions operating on 128-bit vector registers can be
constrained to operate on only a subset of the lanes, using a system
of prefix instructions similar to the existing Thumb IT, in that you
have one prefix instruction which designates up to 4 following
instructions as subject to predication, and within that sequence, the
predicate can be inverted by means of T/E suffixes ('Then' / 'Else').
To support instructions of this type, we've added two new Tablegen
classes `vpred_n` and `vpred_r` for standard clusters of MC operands
to add to a predicated instruction. Both include a flag indicating how
the instruction is predicated at all (options are T, E and 'not
predicated'), and an input register field for the register controlling
the set of active lanes. They differ from each other in that `vpred_r`
also includes an input operand for the previous value of the output
register, for instructions that leave inactive lanes unchanged.
`vpred_n` lacks that extra operand; it will be used for instructions
that don't preserve inactive lanes in their output register (either
because inactive lanes are zeroed, as the MVE load instructions do, or
because the output register isn't a vector at all).
This commit also adds the family of prefix instructions themselves
(VPT / VPST), and all the machinery needed to work with them in
assembly and disassembly (e.g. generating the 't' and 'e' mnemonic
suffixes on disassembled instructions within a predicated block)
I've added a couple of demo instructions that derive from the new
Tablegen base classes and use those two operand clusters. The bulk of
the vector instructions will come in followup commits small enough to
be manageable. (One exception is that I've added the full version of
`isMnemonicVPTPredicable` in the AsmParser, because it seemed
pointless to carefully split it up.)
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62669
llvm-svn: 363258
2019-06-13 21:11:13 +08:00
|
|
|
let Inst{12} = 0b0;
|
|
|
|
let Inst{0} = 0b1;
|
|
|
|
}
|
|
|
|
|
2019-06-18 23:05:42 +08:00
|
|
|
def MVE_VPTv4u32 : MVE_VPTt1u<"u32", 0b10>;
|
|
|
|
def MVE_VPTv8u16 : MVE_VPTt1u<"u16", 0b01>;
|
|
|
|
def MVE_VPTv16u8 : MVE_VPTt1u<"u8", 0b00>;
|
[ARM] Set up infrastructure for MVE vector instructions.
This commit prepares the way to start adding the main collection of
MVE instructions, which operate on the 128-bit vector registers.
The most obvious thing that's needed, and the simplest, is to add the
MQPR register class, which is like the existing QPR except that it has
fewer registers in it.
The more complicated part: MVE defines a system of vector predication,
in which instructions operating on 128-bit vector registers can be
constrained to operate on only a subset of the lanes, using a system
of prefix instructions similar to the existing Thumb IT, in that you
have one prefix instruction which designates up to 4 following
instructions as subject to predication, and within that sequence, the
predicate can be inverted by means of T/E suffixes ('Then' / 'Else').
To support instructions of this type, we've added two new Tablegen
classes `vpred_n` and `vpred_r` for standard clusters of MC operands
to add to a predicated instruction. Both include a flag indicating how
the instruction is predicated at all (options are T, E and 'not
predicated'), and an input register field for the register controlling
the set of active lanes. They differ from each other in that `vpred_r`
also includes an input operand for the previous value of the output
register, for instructions that leave inactive lanes unchanged.
`vpred_n` lacks that extra operand; it will be used for instructions
that don't preserve inactive lanes in their output register (either
because inactive lanes are zeroed, as the MVE load instructions do, or
because the output register isn't a vector at all).
This commit also adds the family of prefix instructions themselves
(VPT / VPST), and all the machinery needed to work with them in
assembly and disassembly (e.g. generating the 't' and 'e' mnemonic
suffixes on disassembled instructions within a predicated block)
I've added a couple of demo instructions that derive from the new
Tablegen base classes and use those two operand clusters. The bulk of
the vector instructions will come in followup commits small enough to
be manageable. (One exception is that I've added the full version of
`isMnemonicVPTPredicable` in the AsmParser, because it seemed
pointless to carefully split it up.)
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62669
llvm-svn: 363258
2019-06-13 21:11:13 +08:00
|
|
|
|
2019-06-18 23:05:42 +08:00
|
|
|
class MVE_VPTt1s<string suffix, bits<2> size>
|
|
|
|
: MVE_VPTt1<suffix, size,
|
2019-09-16 21:02:41 +08:00
|
|
|
(ins vpt_mask:$Mk, MQPR:$Qn, MQPR:$Qm, pred_basic_s:$fc)> {
|
[ARM] Set up infrastructure for MVE vector instructions.
This commit prepares the way to start adding the main collection of
MVE instructions, which operate on the 128-bit vector registers.
The most obvious thing that's needed, and the simplest, is to add the
MQPR register class, which is like the existing QPR except that it has
fewer registers in it.
The more complicated part: MVE defines a system of vector predication,
in which instructions operating on 128-bit vector registers can be
constrained to operate on only a subset of the lanes, using a system
of prefix instructions similar to the existing Thumb IT, in that you
have one prefix instruction which designates up to 4 following
instructions as subject to predication, and within that sequence, the
predicate can be inverted by means of T/E suffixes ('Then' / 'Else').
To support instructions of this type, we've added two new Tablegen
classes `vpred_n` and `vpred_r` for standard clusters of MC operands
to add to a predicated instruction. Both include a flag indicating how
the instruction is predicated at all (options are T, E and 'not
predicated'), and an input register field for the register controlling
the set of active lanes. They differ from each other in that `vpred_r`
also includes an input operand for the previous value of the output
register, for instructions that leave inactive lanes unchanged.
`vpred_n` lacks that extra operand; it will be used for instructions
that don't preserve inactive lanes in their output register (either
because inactive lanes are zeroed, as the MVE load instructions do, or
because the output register isn't a vector at all).
This commit also adds the family of prefix instructions themselves
(VPT / VPST), and all the machinery needed to work with them in
assembly and disassembly (e.g. generating the 't' and 'e' mnemonic
suffixes on disassembled instructions within a predicated block)
I've added a couple of demo instructions that derive from the new
Tablegen base classes and use those two operand clusters. The bulk of
the vector instructions will come in followup commits small enough to
be manageable. (One exception is that I've added the full version of
`isMnemonicVPTPredicable` in the AsmParser, because it seemed
pointless to carefully split it up.)
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62669
llvm-svn: 363258
2019-06-13 21:11:13 +08:00
|
|
|
let Inst{12} = 0b1;
|
|
|
|
}
|
|
|
|
|
2019-06-18 23:05:42 +08:00
|
|
|
def MVE_VPTv4s32 : MVE_VPTt1s<"s32", 0b10>;
|
|
|
|
def MVE_VPTv8s16 : MVE_VPTt1s<"s16", 0b01>;
|
|
|
|
def MVE_VPTv16s8 : MVE_VPTt1s<"s8", 0b00>;
|
[ARM] Set up infrastructure for MVE vector instructions.
This commit prepares the way to start adding the main collection of
MVE instructions, which operate on the 128-bit vector registers.
The most obvious thing that's needed, and the simplest, is to add the
MQPR register class, which is like the existing QPR except that it has
fewer registers in it.
The more complicated part: MVE defines a system of vector predication,
in which instructions operating on 128-bit vector registers can be
constrained to operate on only a subset of the lanes, using a system
of prefix instructions similar to the existing Thumb IT, in that you
have one prefix instruction which designates up to 4 following
instructions as subject to predication, and within that sequence, the
predicate can be inverted by means of T/E suffixes ('Then' / 'Else').
To support instructions of this type, we've added two new Tablegen
classes `vpred_n` and `vpred_r` for standard clusters of MC operands
to add to a predicated instruction. Both include a flag indicating how
the instruction is predicated at all (options are T, E and 'not
predicated'), and an input register field for the register controlling
the set of active lanes. They differ from each other in that `vpred_r`
also includes an input operand for the previous value of the output
register, for instructions that leave inactive lanes unchanged.
`vpred_n` lacks that extra operand; it will be used for instructions
that don't preserve inactive lanes in their output register (either
because inactive lanes are zeroed, as the MVE load instructions do, or
because the output register isn't a vector at all).
This commit also adds the family of prefix instructions themselves
(VPT / VPST), and all the machinery needed to work with them in
assembly and disassembly (e.g. generating the 't' and 'e' mnemonic
suffixes on disassembled instructions within a predicated block)
I've added a couple of demo instructions that derive from the new
Tablegen base classes and use those two operand clusters. The bulk of
the vector instructions will come in followup commits small enough to
be manageable. (One exception is that I've added the full version of
`isMnemonicVPTPredicable` in the AsmParser, because it seemed
pointless to carefully split it up.)
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62669
llvm-svn: 363258
2019-06-13 21:11:13 +08:00
|
|
|
|
2019-06-18 23:05:42 +08:00
|
|
|
class MVE_VPTt2<string suffix, bits<2> size, dag iops>
|
|
|
|
: MVE_VPT<suffix, size, iops,
|
[ARM] Set up infrastructure for MVE vector instructions.
This commit prepares the way to start adding the main collection of
MVE instructions, which operate on the 128-bit vector registers.
The most obvious thing that's needed, and the simplest, is to add the
MQPR register class, which is like the existing QPR except that it has
fewer registers in it.
The more complicated part: MVE defines a system of vector predication,
in which instructions operating on 128-bit vector registers can be
constrained to operate on only a subset of the lanes, using a system
of prefix instructions similar to the existing Thumb IT, in that you
have one prefix instruction which designates up to 4 following
instructions as subject to predication, and within that sequence, the
predicate can be inverted by means of T/E suffixes ('Then' / 'Else').
To support instructions of this type, we've added two new Tablegen
classes `vpred_n` and `vpred_r` for standard clusters of MC operands
to add to a predicated instruction. Both include a flag indicating how
the instruction is predicated at all (options are T, E and 'not
predicated'), and an input register field for the register controlling
the set of active lanes. They differ from each other in that `vpred_r`
also includes an input operand for the previous value of the output
register, for instructions that leave inactive lanes unchanged.
`vpred_n` lacks that extra operand; it will be used for instructions
that don't preserve inactive lanes in their output register (either
because inactive lanes are zeroed, as the MVE load instructions do, or
because the output register isn't a vector at all).
This commit also adds the family of prefix instructions themselves
(VPT / VPST), and all the machinery needed to work with them in
assembly and disassembly (e.g. generating the 't' and 'e' mnemonic
suffixes on disassembled instructions within a predicated block)
I've added a couple of demo instructions that derive from the new
Tablegen base classes and use those two operand clusters. The bulk of
the vector instructions will come in followup commits small enough to
be manageable. (One exception is that I've added the full version of
`isMnemonicVPTPredicable` in the AsmParser, because it seemed
pointless to carefully split it up.)
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62669
llvm-svn: 363258
2019-06-13 21:11:13 +08:00
|
|
|
"$fc, $Qn, $Rm"> {
|
|
|
|
bits<4> Rm;
|
|
|
|
bits<3> fc;
|
|
|
|
bits<4> Mk;
|
|
|
|
|
|
|
|
let Inst{6} = 0b1;
|
|
|
|
let Inst{5} = fc{1};
|
|
|
|
let Inst{3-0} = Rm{3-0};
|
|
|
|
}
|
|
|
|
|
2019-06-18 23:05:42 +08:00
|
|
|
class MVE_VPTt2i<string suffix, bits<2> size>
|
|
|
|
: MVE_VPTt2<suffix, size,
|
2019-09-16 21:02:41 +08:00
|
|
|
(ins vpt_mask:$Mk, MQPR:$Qn, GPRwithZR:$Rm, pred_basic_i:$fc)> {
|
[ARM] Set up infrastructure for MVE vector instructions.
This commit prepares the way to start adding the main collection of
MVE instructions, which operate on the 128-bit vector registers.
The most obvious thing that's needed, and the simplest, is to add the
MQPR register class, which is like the existing QPR except that it has
fewer registers in it.
The more complicated part: MVE defines a system of vector predication,
in which instructions operating on 128-bit vector registers can be
constrained to operate on only a subset of the lanes, using a system
of prefix instructions similar to the existing Thumb IT, in that you
have one prefix instruction which designates up to 4 following
instructions as subject to predication, and within that sequence, the
predicate can be inverted by means of T/E suffixes ('Then' / 'Else').
To support instructions of this type, we've added two new Tablegen
classes `vpred_n` and `vpred_r` for standard clusters of MC operands
to add to a predicated instruction. Both include a flag indicating how
the instruction is predicated at all (options are T, E and 'not
predicated'), and an input register field for the register controlling
the set of active lanes. They differ from each other in that `vpred_r`
also includes an input operand for the previous value of the output
register, for instructions that leave inactive lanes unchanged.
`vpred_n` lacks that extra operand; it will be used for instructions
that don't preserve inactive lanes in their output register (either
because inactive lanes are zeroed, as the MVE load instructions do, or
because the output register isn't a vector at all).
This commit also adds the family of prefix instructions themselves
(VPT / VPST), and all the machinery needed to work with them in
assembly and disassembly (e.g. generating the 't' and 'e' mnemonic
suffixes on disassembled instructions within a predicated block)
I've added a couple of demo instructions that derive from the new
Tablegen base classes and use those two operand clusters. The bulk of
the vector instructions will come in followup commits small enough to
be manageable. (One exception is that I've added the full version of
`isMnemonicVPTPredicable` in the AsmParser, because it seemed
pointless to carefully split it up.)
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62669
llvm-svn: 363258
2019-06-13 21:11:13 +08:00
|
|
|
let Inst{12} = 0b0;
|
|
|
|
let Inst{5} = 0b0;
|
|
|
|
}
|
|
|
|
|
2019-06-18 23:05:42 +08:00
|
|
|
def MVE_VPTv4i32r : MVE_VPTt2i<"i32", 0b10>;
|
|
|
|
def MVE_VPTv8i16r : MVE_VPTt2i<"i16", 0b01>;
|
|
|
|
def MVE_VPTv16i8r : MVE_VPTt2i<"i8", 0b00>;
|
[ARM] Set up infrastructure for MVE vector instructions.
This commit prepares the way to start adding the main collection of
MVE instructions, which operate on the 128-bit vector registers.
The most obvious thing that's needed, and the simplest, is to add the
MQPR register class, which is like the existing QPR except that it has
fewer registers in it.
The more complicated part: MVE defines a system of vector predication,
in which instructions operating on 128-bit vector registers can be
constrained to operate on only a subset of the lanes, using a system
of prefix instructions similar to the existing Thumb IT, in that you
have one prefix instruction which designates up to 4 following
instructions as subject to predication, and within that sequence, the
predicate can be inverted by means of T/E suffixes ('Then' / 'Else').
To support instructions of this type, we've added two new Tablegen
classes `vpred_n` and `vpred_r` for standard clusters of MC operands
to add to a predicated instruction. Both include a flag indicating how
the instruction is predicated at all (options are T, E and 'not
predicated'), and an input register field for the register controlling
the set of active lanes. They differ from each other in that `vpred_r`
also includes an input operand for the previous value of the output
register, for instructions that leave inactive lanes unchanged.
`vpred_n` lacks that extra operand; it will be used for instructions
that don't preserve inactive lanes in their output register (either
because inactive lanes are zeroed, as the MVE load instructions do, or
because the output register isn't a vector at all).
This commit also adds the family of prefix instructions themselves
(VPT / VPST), and all the machinery needed to work with them in
assembly and disassembly (e.g. generating the 't' and 'e' mnemonic
suffixes on disassembled instructions within a predicated block)
I've added a couple of demo instructions that derive from the new
Tablegen base classes and use those two operand clusters. The bulk of
the vector instructions will come in followup commits small enough to
be manageable. (One exception is that I've added the full version of
`isMnemonicVPTPredicable` in the AsmParser, because it seemed
pointless to carefully split it up.)
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62669
llvm-svn: 363258
2019-06-13 21:11:13 +08:00
|
|
|
|
2019-06-18 23:05:42 +08:00
|
|
|
class MVE_VPTt2u<string suffix, bits<2> size>
|
|
|
|
: MVE_VPTt2<suffix, size,
|
2019-09-16 21:02:41 +08:00
|
|
|
(ins vpt_mask:$Mk, MQPR:$Qn, GPRwithZR:$Rm, pred_basic_u:$fc)> {
|
[ARM] Set up infrastructure for MVE vector instructions.
This commit prepares the way to start adding the main collection of
MVE instructions, which operate on the 128-bit vector registers.
The most obvious thing that's needed, and the simplest, is to add the
MQPR register class, which is like the existing QPR except that it has
fewer registers in it.
The more complicated part: MVE defines a system of vector predication,
in which instructions operating on 128-bit vector registers can be
constrained to operate on only a subset of the lanes, using a system
of prefix instructions similar to the existing Thumb IT, in that you
have one prefix instruction which designates up to 4 following
instructions as subject to predication, and within that sequence, the
predicate can be inverted by means of T/E suffixes ('Then' / 'Else').
To support instructions of this type, we've added two new Tablegen
classes `vpred_n` and `vpred_r` for standard clusters of MC operands
to add to a predicated instruction. Both include a flag indicating how
the instruction is predicated at all (options are T, E and 'not
predicated'), and an input register field for the register controlling
the set of active lanes. They differ from each other in that `vpred_r`
also includes an input operand for the previous value of the output
register, for instructions that leave inactive lanes unchanged.
`vpred_n` lacks that extra operand; it will be used for instructions
that don't preserve inactive lanes in their output register (either
because inactive lanes are zeroed, as the MVE load instructions do, or
because the output register isn't a vector at all).
This commit also adds the family of prefix instructions themselves
(VPT / VPST), and all the machinery needed to work with them in
assembly and disassembly (e.g. generating the 't' and 'e' mnemonic
suffixes on disassembled instructions within a predicated block)
I've added a couple of demo instructions that derive from the new
Tablegen base classes and use those two operand clusters. The bulk of
the vector instructions will come in followup commits small enough to
be manageable. (One exception is that I've added the full version of
`isMnemonicVPTPredicable` in the AsmParser, because it seemed
pointless to carefully split it up.)
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62669
llvm-svn: 363258
2019-06-13 21:11:13 +08:00
|
|
|
let Inst{12} = 0b0;
|
|
|
|
let Inst{5} = 0b1;
|
|
|
|
}
|
|
|
|
|
2019-06-18 23:05:42 +08:00
|
|
|
def MVE_VPTv4u32r : MVE_VPTt2u<"u32", 0b10>;
|
|
|
|
def MVE_VPTv8u16r : MVE_VPTt2u<"u16", 0b01>;
|
|
|
|
def MVE_VPTv16u8r : MVE_VPTt2u<"u8", 0b00>;
|
[ARM] Set up infrastructure for MVE vector instructions.
This commit prepares the way to start adding the main collection of
MVE instructions, which operate on the 128-bit vector registers.
The most obvious thing that's needed, and the simplest, is to add the
MQPR register class, which is like the existing QPR except that it has
fewer registers in it.
The more complicated part: MVE defines a system of vector predication,
in which instructions operating on 128-bit vector registers can be
constrained to operate on only a subset of the lanes, using a system
of prefix instructions similar to the existing Thumb IT, in that you
have one prefix instruction which designates up to 4 following
instructions as subject to predication, and within that sequence, the
predicate can be inverted by means of T/E suffixes ('Then' / 'Else').
To support instructions of this type, we've added two new Tablegen
classes `vpred_n` and `vpred_r` for standard clusters of MC operands
to add to a predicated instruction. Both include a flag indicating how
the instruction is predicated at all (options are T, E and 'not
predicated'), and an input register field for the register controlling
the set of active lanes. They differ from each other in that `vpred_r`
also includes an input operand for the previous value of the output
register, for instructions that leave inactive lanes unchanged.
`vpred_n` lacks that extra operand; it will be used for instructions
that don't preserve inactive lanes in their output register (either
because inactive lanes are zeroed, as the MVE load instructions do, or
because the output register isn't a vector at all).
This commit also adds the family of prefix instructions themselves
(VPT / VPST), and all the machinery needed to work with them in
assembly and disassembly (e.g. generating the 't' and 'e' mnemonic
suffixes on disassembled instructions within a predicated block)
I've added a couple of demo instructions that derive from the new
Tablegen base classes and use those two operand clusters. The bulk of
the vector instructions will come in followup commits small enough to
be manageable. (One exception is that I've added the full version of
`isMnemonicVPTPredicable` in the AsmParser, because it seemed
pointless to carefully split it up.)
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62669
llvm-svn: 363258
2019-06-13 21:11:13 +08:00
|
|
|
|
2019-06-18 23:05:42 +08:00
|
|
|
class MVE_VPTt2s<string suffix, bits<2> size>
|
|
|
|
: MVE_VPTt2<suffix, size,
|
2019-09-16 21:02:41 +08:00
|
|
|
(ins vpt_mask:$Mk, MQPR:$Qn, GPRwithZR:$Rm, pred_basic_s:$fc)> {
|
[ARM] Set up infrastructure for MVE vector instructions.
This commit prepares the way to start adding the main collection of
MVE instructions, which operate on the 128-bit vector registers.
The most obvious thing that's needed, and the simplest, is to add the
MQPR register class, which is like the existing QPR except that it has
fewer registers in it.
The more complicated part: MVE defines a system of vector predication,
in which instructions operating on 128-bit vector registers can be
constrained to operate on only a subset of the lanes, using a system
of prefix instructions similar to the existing Thumb IT, in that you
have one prefix instruction which designates up to 4 following
instructions as subject to predication, and within that sequence, the
predicate can be inverted by means of T/E suffixes ('Then' / 'Else').
To support instructions of this type, we've added two new Tablegen
classes `vpred_n` and `vpred_r` for standard clusters of MC operands
to add to a predicated instruction. Both include a flag indicating how
the instruction is predicated at all (options are T, E and 'not
predicated'), and an input register field for the register controlling
the set of active lanes. They differ from each other in that `vpred_r`
also includes an input operand for the previous value of the output
register, for instructions that leave inactive lanes unchanged.
`vpred_n` lacks that extra operand; it will be used for instructions
that don't preserve inactive lanes in their output register (either
because inactive lanes are zeroed, as the MVE load instructions do, or
because the output register isn't a vector at all).
This commit also adds the family of prefix instructions themselves
(VPT / VPST), and all the machinery needed to work with them in
assembly and disassembly (e.g. generating the 't' and 'e' mnemonic
suffixes on disassembled instructions within a predicated block)
I've added a couple of demo instructions that derive from the new
Tablegen base classes and use those two operand clusters. The bulk of
the vector instructions will come in followup commits small enough to
be manageable. (One exception is that I've added the full version of
`isMnemonicVPTPredicable` in the AsmParser, because it seemed
pointless to carefully split it up.)
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62669
llvm-svn: 363258
2019-06-13 21:11:13 +08:00
|
|
|
let Inst{12} = 0b1;
|
|
|
|
}
|
|
|
|
|
2019-06-18 23:05:42 +08:00
|
|
|
def MVE_VPTv4s32r : MVE_VPTt2s<"s32", 0b10>;
|
|
|
|
def MVE_VPTv8s16r : MVE_VPTt2s<"s16", 0b01>;
|
|
|
|
def MVE_VPTv16s8r : MVE_VPTt2s<"s8", 0b00>;
|
[ARM] Set up infrastructure for MVE vector instructions.
This commit prepares the way to start adding the main collection of
MVE instructions, which operate on the 128-bit vector registers.
The most obvious thing that's needed, and the simplest, is to add the
MQPR register class, which is like the existing QPR except that it has
fewer registers in it.
The more complicated part: MVE defines a system of vector predication,
in which instructions operating on 128-bit vector registers can be
constrained to operate on only a subset of the lanes, using a system
of prefix instructions similar to the existing Thumb IT, in that you
have one prefix instruction which designates up to 4 following
instructions as subject to predication, and within that sequence, the
predicate can be inverted by means of T/E suffixes ('Then' / 'Else').
To support instructions of this type, we've added two new Tablegen
classes `vpred_n` and `vpred_r` for standard clusters of MC operands
to add to a predicated instruction. Both include a flag indicating how
the instruction is predicated at all (options are T, E and 'not
predicated'), and an input register field for the register controlling
the set of active lanes. They differ from each other in that `vpred_r`
also includes an input operand for the previous value of the output
register, for instructions that leave inactive lanes unchanged.
`vpred_n` lacks that extra operand; it will be used for instructions
that don't preserve inactive lanes in their output register (either
because inactive lanes are zeroed, as the MVE load instructions do, or
because the output register isn't a vector at all).
This commit also adds the family of prefix instructions themselves
(VPT / VPST), and all the machinery needed to work with them in
assembly and disassembly (e.g. generating the 't' and 'e' mnemonic
suffixes on disassembled instructions within a predicated block)
I've added a couple of demo instructions that derive from the new
Tablegen base classes and use those two operand clusters. The bulk of
the vector instructions will come in followup commits small enough to
be manageable. (One exception is that I've added the full version of
`isMnemonicVPTPredicable` in the AsmParser, because it seemed
pointless to carefully split it up.)
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62669
llvm-svn: 363258
2019-06-13 21:11:13 +08:00
|
|
|
|
|
|
|
|
2019-06-18 23:05:42 +08:00
|
|
|
class MVE_VPTf<string suffix, bit size, dag iops, string asm, list<dag> pattern=[]>
|
[ARM] Set up infrastructure for MVE vector instructions.
This commit prepares the way to start adding the main collection of
MVE instructions, which operate on the 128-bit vector registers.
The most obvious thing that's needed, and the simplest, is to add the
MQPR register class, which is like the existing QPR except that it has
fewer registers in it.
The more complicated part: MVE defines a system of vector predication,
in which instructions operating on 128-bit vector registers can be
constrained to operate on only a subset of the lanes, using a system
of prefix instructions similar to the existing Thumb IT, in that you
have one prefix instruction which designates up to 4 following
instructions as subject to predication, and within that sequence, the
predicate can be inverted by means of T/E suffixes ('Then' / 'Else').
To support instructions of this type, we've added two new Tablegen
classes `vpred_n` and `vpred_r` for standard clusters of MC operands
to add to a predicated instruction. Both include a flag indicating how
the instruction is predicated at all (options are T, E and 'not
predicated'), and an input register field for the register controlling
the set of active lanes. They differ from each other in that `vpred_r`
also includes an input operand for the previous value of the output
register, for instructions that leave inactive lanes unchanged.
`vpred_n` lacks that extra operand; it will be used for instructions
that don't preserve inactive lanes in their output register (either
because inactive lanes are zeroed, as the MVE load instructions do, or
because the output register isn't a vector at all).
This commit also adds the family of prefix instructions themselves
(VPT / VPST), and all the machinery needed to work with them in
assembly and disassembly (e.g. generating the 't' and 'e' mnemonic
suffixes on disassembled instructions within a predicated block)
I've added a couple of demo instructions that derive from the new
Tablegen base classes and use those two operand clusters. The bulk of
the vector instructions will come in followup commits small enough to
be manageable. (One exception is that I've added the full version of
`isMnemonicVPTPredicable` in the AsmParser, because it seemed
pointless to carefully split it up.)
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62669
llvm-svn: 363258
2019-06-13 21:11:13 +08:00
|
|
|
: MVE_MI<(outs ), iops, NoItinerary, !strconcat("vpt", "${Mk}", ".", suffix), asm,
|
|
|
|
"", pattern> {
|
|
|
|
bits<3> fc;
|
|
|
|
bits<4> Mk;
|
|
|
|
bits<3> Qn;
|
|
|
|
|
|
|
|
let Inst{31-29} = 0b111;
|
|
|
|
let Inst{28} = size;
|
|
|
|
let Inst{27-23} = 0b11100;
|
|
|
|
let Inst{22} = Mk{3};
|
|
|
|
let Inst{21-20} = 0b11;
|
|
|
|
let Inst{19-17} = Qn{2-0};
|
|
|
|
let Inst{16} = 0b1;
|
|
|
|
let Inst{15-13} = Mk{2-0};
|
|
|
|
let Inst{12} = fc{2};
|
|
|
|
let Inst{11-8} = 0b1111;
|
|
|
|
let Inst{7} = fc{0};
|
|
|
|
let Inst{4} = 0b0;
|
|
|
|
|
2019-09-16 21:02:41 +08:00
|
|
|
let Defs = [VPR];
|
[ARM] Set up infrastructure for MVE vector instructions.
This commit prepares the way to start adding the main collection of
MVE instructions, which operate on the 128-bit vector registers.
The most obvious thing that's needed, and the simplest, is to add the
MQPR register class, which is like the existing QPR except that it has
fewer registers in it.
The more complicated part: MVE defines a system of vector predication,
in which instructions operating on 128-bit vector registers can be
constrained to operate on only a subset of the lanes, using a system
of prefix instructions similar to the existing Thumb IT, in that you
have one prefix instruction which designates up to 4 following
instructions as subject to predication, and within that sequence, the
predicate can be inverted by means of T/E suffixes ('Then' / 'Else').
To support instructions of this type, we've added two new Tablegen
classes `vpred_n` and `vpred_r` for standard clusters of MC operands
to add to a predicated instruction. Both include a flag indicating how
the instruction is predicated at all (options are T, E and 'not
predicated'), and an input register field for the register controlling
the set of active lanes. They differ from each other in that `vpred_r`
also includes an input operand for the previous value of the output
register, for instructions that leave inactive lanes unchanged.
`vpred_n` lacks that extra operand; it will be used for instructions
that don't preserve inactive lanes in their output register (either
because inactive lanes are zeroed, as the MVE load instructions do, or
because the output register isn't a vector at all).
This commit also adds the family of prefix instructions themselves
(VPT / VPST), and all the machinery needed to work with them in
assembly and disassembly (e.g. generating the 't' and 'e' mnemonic
suffixes on disassembled instructions within a predicated block)
I've added a couple of demo instructions that derive from the new
Tablegen base classes and use those two operand clusters. The bulk of
the vector instructions will come in followup commits small enough to
be manageable. (One exception is that I've added the full version of
`isMnemonicVPTPredicable` in the AsmParser, because it seemed
pointless to carefully split it up.)
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62669
llvm-svn: 363258
2019-06-13 21:11:13 +08:00
|
|
|
let Predicates = [HasMVEFloat];
|
2019-10-15 21:12:51 +08:00
|
|
|
let validForTailPredication = 1;
|
[ARM] Set up infrastructure for MVE vector instructions.
This commit prepares the way to start adding the main collection of
MVE instructions, which operate on the 128-bit vector registers.
The most obvious thing that's needed, and the simplest, is to add the
MQPR register class, which is like the existing QPR except that it has
fewer registers in it.
The more complicated part: MVE defines a system of vector predication,
in which instructions operating on 128-bit vector registers can be
constrained to operate on only a subset of the lanes, using a system
of prefix instructions similar to the existing Thumb IT, in that you
have one prefix instruction which designates up to 4 following
instructions as subject to predication, and within that sequence, the
predicate can be inverted by means of T/E suffixes ('Then' / 'Else').
To support instructions of this type, we've added two new Tablegen
classes `vpred_n` and `vpred_r` for standard clusters of MC operands
to add to a predicated instruction. Both include a flag indicating how
the instruction is predicated at all (options are T, E and 'not
predicated'), and an input register field for the register controlling
the set of active lanes. They differ from each other in that `vpred_r`
also includes an input operand for the previous value of the output
register, for instructions that leave inactive lanes unchanged.
`vpred_n` lacks that extra operand; it will be used for instructions
that don't preserve inactive lanes in their output register (either
because inactive lanes are zeroed, as the MVE load instructions do, or
because the output register isn't a vector at all).
This commit also adds the family of prefix instructions themselves
(VPT / VPST), and all the machinery needed to work with them in
assembly and disassembly (e.g. generating the 't' and 'e' mnemonic
suffixes on disassembled instructions within a predicated block)
I've added a couple of demo instructions that derive from the new
Tablegen base classes and use those two operand clusters. The bulk of
the vector instructions will come in followup commits small enough to
be manageable. (One exception is that I've added the full version of
`isMnemonicVPTPredicable` in the AsmParser, because it seemed
pointless to carefully split it up.)
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62669
llvm-svn: 363258
2019-06-13 21:11:13 +08:00
|
|
|
}
|
|
|
|
|
2019-06-18 23:05:42 +08:00
|
|
|
class MVE_VPTft1<string suffix, bit size>
|
2019-09-16 21:02:41 +08:00
|
|
|
: MVE_VPTf<suffix, size, (ins vpt_mask:$Mk, MQPR:$Qn, MQPR:$Qm, pred_basic_fp:$fc),
|
[ARM] Set up infrastructure for MVE vector instructions.
This commit prepares the way to start adding the main collection of
MVE instructions, which operate on the 128-bit vector registers.
The most obvious thing that's needed, and the simplest, is to add the
MQPR register class, which is like the existing QPR except that it has
fewer registers in it.
The more complicated part: MVE defines a system of vector predication,
in which instructions operating on 128-bit vector registers can be
constrained to operate on only a subset of the lanes, using a system
of prefix instructions similar to the existing Thumb IT, in that you
have one prefix instruction which designates up to 4 following
instructions as subject to predication, and within that sequence, the
predicate can be inverted by means of T/E suffixes ('Then' / 'Else').
To support instructions of this type, we've added two new Tablegen
classes `vpred_n` and `vpred_r` for standard clusters of MC operands
to add to a predicated instruction. Both include a flag indicating how
the instruction is predicated at all (options are T, E and 'not
predicated'), and an input register field for the register controlling
the set of active lanes. They differ from each other in that `vpred_r`
also includes an input operand for the previous value of the output
register, for instructions that leave inactive lanes unchanged.
`vpred_n` lacks that extra operand; it will be used for instructions
that don't preserve inactive lanes in their output register (either
because inactive lanes are zeroed, as the MVE load instructions do, or
because the output register isn't a vector at all).
This commit also adds the family of prefix instructions themselves
(VPT / VPST), and all the machinery needed to work with them in
assembly and disassembly (e.g. generating the 't' and 'e' mnemonic
suffixes on disassembled instructions within a predicated block)
I've added a couple of demo instructions that derive from the new
Tablegen base classes and use those two operand clusters. The bulk of
the vector instructions will come in followup commits small enough to
be manageable. (One exception is that I've added the full version of
`isMnemonicVPTPredicable` in the AsmParser, because it seemed
pointless to carefully split it up.)
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62669
llvm-svn: 363258
2019-06-13 21:11:13 +08:00
|
|
|
"$fc, $Qn, $Qm"> {
|
|
|
|
bits<3> fc;
|
|
|
|
bits<4> Qm;
|
|
|
|
|
|
|
|
let Inst{6} = 0b0;
|
|
|
|
let Inst{5} = Qm{3};
|
|
|
|
let Inst{3-1} = Qm{2-0};
|
|
|
|
let Inst{0} = fc{1};
|
|
|
|
}
|
|
|
|
|
2019-06-18 23:05:42 +08:00
|
|
|
def MVE_VPTv4f32 : MVE_VPTft1<"f32", 0b0>;
|
|
|
|
def MVE_VPTv8f16 : MVE_VPTft1<"f16", 0b1>;
|
[ARM] Set up infrastructure for MVE vector instructions.
This commit prepares the way to start adding the main collection of
MVE instructions, which operate on the 128-bit vector registers.
The most obvious thing that's needed, and the simplest, is to add the
MQPR register class, which is like the existing QPR except that it has
fewer registers in it.
The more complicated part: MVE defines a system of vector predication,
in which instructions operating on 128-bit vector registers can be
constrained to operate on only a subset of the lanes, using a system
of prefix instructions similar to the existing Thumb IT, in that you
have one prefix instruction which designates up to 4 following
instructions as subject to predication, and within that sequence, the
predicate can be inverted by means of T/E suffixes ('Then' / 'Else').
To support instructions of this type, we've added two new Tablegen
classes `vpred_n` and `vpred_r` for standard clusters of MC operands
to add to a predicated instruction. Both include a flag indicating how
the instruction is predicated at all (options are T, E and 'not
predicated'), and an input register field for the register controlling
the set of active lanes. They differ from each other in that `vpred_r`
also includes an input operand for the previous value of the output
register, for instructions that leave inactive lanes unchanged.
`vpred_n` lacks that extra operand; it will be used for instructions
that don't preserve inactive lanes in their output register (either
because inactive lanes are zeroed, as the MVE load instructions do, or
because the output register isn't a vector at all).
This commit also adds the family of prefix instructions themselves
(VPT / VPST), and all the machinery needed to work with them in
assembly and disassembly (e.g. generating the 't' and 'e' mnemonic
suffixes on disassembled instructions within a predicated block)
I've added a couple of demo instructions that derive from the new
Tablegen base classes and use those two operand clusters. The bulk of
the vector instructions will come in followup commits small enough to
be manageable. (One exception is that I've added the full version of
`isMnemonicVPTPredicable` in the AsmParser, because it seemed
pointless to carefully split it up.)
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62669
llvm-svn: 363258
2019-06-13 21:11:13 +08:00
|
|
|
|
2019-06-18 23:05:42 +08:00
|
|
|
class MVE_VPTft2<string suffix, bit size>
|
2019-09-16 21:02:41 +08:00
|
|
|
: MVE_VPTf<suffix, size, (ins vpt_mask:$Mk, MQPR:$Qn, GPRwithZR:$Rm, pred_basic_fp:$fc),
|
[ARM] Set up infrastructure for MVE vector instructions.
This commit prepares the way to start adding the main collection of
MVE instructions, which operate on the 128-bit vector registers.
The most obvious thing that's needed, and the simplest, is to add the
MQPR register class, which is like the existing QPR except that it has
fewer registers in it.
The more complicated part: MVE defines a system of vector predication,
in which instructions operating on 128-bit vector registers can be
constrained to operate on only a subset of the lanes, using a system
of prefix instructions similar to the existing Thumb IT, in that you
have one prefix instruction which designates up to 4 following
instructions as subject to predication, and within that sequence, the
predicate can be inverted by means of T/E suffixes ('Then' / 'Else').
To support instructions of this type, we've added two new Tablegen
classes `vpred_n` and `vpred_r` for standard clusters of MC operands
to add to a predicated instruction. Both include a flag indicating how
the instruction is predicated at all (options are T, E and 'not
predicated'), and an input register field for the register controlling
the set of active lanes. They differ from each other in that `vpred_r`
also includes an input operand for the previous value of the output
register, for instructions that leave inactive lanes unchanged.
`vpred_n` lacks that extra operand; it will be used for instructions
that don't preserve inactive lanes in their output register (either
because inactive lanes are zeroed, as the MVE load instructions do, or
because the output register isn't a vector at all).
This commit also adds the family of prefix instructions themselves
(VPT / VPST), and all the machinery needed to work with them in
assembly and disassembly (e.g. generating the 't' and 'e' mnemonic
suffixes on disassembled instructions within a predicated block)
I've added a couple of demo instructions that derive from the new
Tablegen base classes and use those two operand clusters. The bulk of
the vector instructions will come in followup commits small enough to
be manageable. (One exception is that I've added the full version of
`isMnemonicVPTPredicable` in the AsmParser, because it seemed
pointless to carefully split it up.)
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62669
llvm-svn: 363258
2019-06-13 21:11:13 +08:00
|
|
|
"$fc, $Qn, $Rm"> {
|
|
|
|
bits<3> fc;
|
|
|
|
bits<4> Rm;
|
|
|
|
|
|
|
|
let Inst{6} = 0b1;
|
|
|
|
let Inst{5} = fc{1};
|
|
|
|
let Inst{3-0} = Rm{3-0};
|
|
|
|
}
|
|
|
|
|
2019-06-18 23:05:42 +08:00
|
|
|
def MVE_VPTv4f32r : MVE_VPTft2<"f32", 0b0>;
|
|
|
|
def MVE_VPTv8f16r : MVE_VPTft2<"f16", 0b1>;
|
[ARM] Set up infrastructure for MVE vector instructions.
This commit prepares the way to start adding the main collection of
MVE instructions, which operate on the 128-bit vector registers.
The most obvious thing that's needed, and the simplest, is to add the
MQPR register class, which is like the existing QPR except that it has
fewer registers in it.
The more complicated part: MVE defines a system of vector predication,
in which instructions operating on 128-bit vector registers can be
constrained to operate on only a subset of the lanes, using a system
of prefix instructions similar to the existing Thumb IT, in that you
have one prefix instruction which designates up to 4 following
instructions as subject to predication, and within that sequence, the
predicate can be inverted by means of T/E suffixes ('Then' / 'Else').
To support instructions of this type, we've added two new Tablegen
classes `vpred_n` and `vpred_r` for standard clusters of MC operands
to add to a predicated instruction. Both include a flag indicating how
the instruction is predicated at all (options are T, E and 'not
predicated'), and an input register field for the register controlling
the set of active lanes. They differ from each other in that `vpred_r`
also includes an input operand for the previous value of the output
register, for instructions that leave inactive lanes unchanged.
`vpred_n` lacks that extra operand; it will be used for instructions
that don't preserve inactive lanes in their output register (either
because inactive lanes are zeroed, as the MVE load instructions do, or
because the output register isn't a vector at all).
This commit also adds the family of prefix instructions themselves
(VPT / VPST), and all the machinery needed to work with them in
assembly and disassembly (e.g. generating the 't' and 'e' mnemonic
suffixes on disassembled instructions within a predicated block)
I've added a couple of demo instructions that derive from the new
Tablegen base classes and use those two operand clusters. The bulk of
the vector instructions will come in followup commits small enough to
be manageable. (One exception is that I've added the full version of
`isMnemonicVPTPredicable` in the AsmParser, because it seemed
pointless to carefully split it up.)
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62669
llvm-svn: 363258
2019-06-13 21:11:13 +08:00
|
|
|
|
2019-06-18 23:05:42 +08:00
|
|
|
def MVE_VPST : MVE_MI<(outs ), (ins vpt_mask:$Mk), NoItinerary,
|
[ARM] Set up infrastructure for MVE vector instructions.
This commit prepares the way to start adding the main collection of
MVE instructions, which operate on the 128-bit vector registers.
The most obvious thing that's needed, and the simplest, is to add the
MQPR register class, which is like the existing QPR except that it has
fewer registers in it.
The more complicated part: MVE defines a system of vector predication,
in which instructions operating on 128-bit vector registers can be
constrained to operate on only a subset of the lanes, using a system
of prefix instructions similar to the existing Thumb IT, in that you
have one prefix instruction which designates up to 4 following
instructions as subject to predication, and within that sequence, the
predicate can be inverted by means of T/E suffixes ('Then' / 'Else').
To support instructions of this type, we've added two new Tablegen
classes `vpred_n` and `vpred_r` for standard clusters of MC operands
to add to a predicated instruction. Both include a flag indicating how
the instruction is predicated at all (options are T, E and 'not
predicated'), and an input register field for the register controlling
the set of active lanes. They differ from each other in that `vpred_r`
also includes an input operand for the previous value of the output
register, for instructions that leave inactive lanes unchanged.
`vpred_n` lacks that extra operand; it will be used for instructions
that don't preserve inactive lanes in their output register (either
because inactive lanes are zeroed, as the MVE load instructions do, or
because the output register isn't a vector at all).
This commit also adds the family of prefix instructions themselves
(VPT / VPST), and all the machinery needed to work with them in
assembly and disassembly (e.g. generating the 't' and 'e' mnemonic
suffixes on disassembled instructions within a predicated block)
I've added a couple of demo instructions that derive from the new
Tablegen base classes and use those two operand clusters. The bulk of
the vector instructions will come in followup commits small enough to
be manageable. (One exception is that I've added the full version of
`isMnemonicVPTPredicable` in the AsmParser, because it seemed
pointless to carefully split it up.)
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62669
llvm-svn: 363258
2019-06-13 21:11:13 +08:00
|
|
|
!strconcat("vpst", "${Mk}"), "", "", []> {
|
|
|
|
bits<4> Mk;
|
|
|
|
|
|
|
|
let Inst{31-23} = 0b111111100;
|
|
|
|
let Inst{22} = Mk{3};
|
|
|
|
let Inst{21-16} = 0b110001;
|
|
|
|
let Inst{15-13} = Mk{2-0};
|
|
|
|
let Inst{12-0} = 0b0111101001101;
|
|
|
|
let Unpredictable{12} = 0b1;
|
|
|
|
let Unpredictable{7} = 0b1;
|
|
|
|
let Unpredictable{5} = 0b1;
|
|
|
|
|
2019-10-17 16:46:31 +08:00
|
|
|
let Uses = [VPR];
|
2019-10-15 21:12:51 +08:00
|
|
|
let validForTailPredication = 1;
|
[ARM] Set up infrastructure for MVE vector instructions.
This commit prepares the way to start adding the main collection of
MVE instructions, which operate on the 128-bit vector registers.
The most obvious thing that's needed, and the simplest, is to add the
MQPR register class, which is like the existing QPR except that it has
fewer registers in it.
The more complicated part: MVE defines a system of vector predication,
in which instructions operating on 128-bit vector registers can be
constrained to operate on only a subset of the lanes, using a system
of prefix instructions similar to the existing Thumb IT, in that you
have one prefix instruction which designates up to 4 following
instructions as subject to predication, and within that sequence, the
predicate can be inverted by means of T/E suffixes ('Then' / 'Else').
To support instructions of this type, we've added two new Tablegen
classes `vpred_n` and `vpred_r` for standard clusters of MC operands
to add to a predicated instruction. Both include a flag indicating how
the instruction is predicated at all (options are T, E and 'not
predicated'), and an input register field for the register controlling
the set of active lanes. They differ from each other in that `vpred_r`
also includes an input operand for the previous value of the output
register, for instructions that leave inactive lanes unchanged.
`vpred_n` lacks that extra operand; it will be used for instructions
that don't preserve inactive lanes in their output register (either
because inactive lanes are zeroed, as the MVE load instructions do, or
because the output register isn't a vector at all).
This commit also adds the family of prefix instructions themselves
(VPT / VPST), and all the machinery needed to work with them in
assembly and disassembly (e.g. generating the 't' and 'e' mnemonic
suffixes on disassembled instructions within a predicated block)
I've added a couple of demo instructions that derive from the new
Tablegen base classes and use those two operand clusters. The bulk of
the vector instructions will come in followup commits small enough to
be manageable. (One exception is that I've added the full version of
`isMnemonicVPTPredicable` in the AsmParser, because it seemed
pointless to carefully split it up.)
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62669
llvm-svn: 363258
2019-06-13 21:11:13 +08:00
|
|
|
}
|
2019-06-25 19:24:33 +08:00
|
|
|
|
|
|
|
def MVE_VPSEL : MVE_p<(outs MQPR:$Qd), (ins MQPR:$Qn, MQPR:$Qm), NoItinerary,
|
|
|
|
"vpsel", "", "$Qd, $Qn, $Qm", vpred_n, "", []> {
|
|
|
|
bits<4> Qn;
|
|
|
|
bits<4> Qd;
|
|
|
|
bits<4> Qm;
|
|
|
|
|
|
|
|
let Inst{28} = 0b1;
|
|
|
|
let Inst{25-23} = 0b100;
|
|
|
|
let Inst{22} = Qd{3};
|
|
|
|
let Inst{21-20} = 0b11;
|
|
|
|
let Inst{19-17} = Qn{2-0};
|
|
|
|
let Inst{16} = 0b1;
|
|
|
|
let Inst{15-13} = Qd{2-0};
|
|
|
|
let Inst{12-9} = 0b0111;
|
|
|
|
let Inst{8} = 0b1;
|
|
|
|
let Inst{7} = Qn{3};
|
|
|
|
let Inst{6} = 0b0;
|
|
|
|
let Inst{5} = Qm{3};
|
|
|
|
let Inst{4} = 0b0;
|
|
|
|
let Inst{3-1} = Qm{2-0};
|
|
|
|
let Inst{0} = 0b1;
|
2019-10-15 21:12:51 +08:00
|
|
|
let validForTailPredication = 1;
|
2019-06-25 19:24:33 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
foreach suffix = ["s8", "s16", "s32", "u8", "u16", "u32",
|
|
|
|
"i8", "i16", "i32", "f16", "f32"] in
|
|
|
|
def : MVEInstAlias<"vpsel${vp}." # suffix # "\t$Qd, $Qn, $Qm",
|
|
|
|
(MVE_VPSEL MQPR:$Qd, MQPR:$Qn, MQPR:$Qm, vpred_n:$vp)>;
|
|
|
|
|
2019-07-24 19:08:14 +08:00
|
|
|
let Predicates = [HasMVEInt] in {
|
|
|
|
def : Pat<(v16i8 (vselect (v16i1 VCCR:$pred), (v16i8 MQPR:$v1), (v16i8 MQPR:$v2))),
|
|
|
|
(v16i8 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0, VCCR:$pred))>;
|
|
|
|
def : Pat<(v8i16 (vselect (v8i1 VCCR:$pred), (v8i16 MQPR:$v1), (v8i16 MQPR:$v2))),
|
|
|
|
(v8i16 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0, VCCR:$pred))>;
|
|
|
|
def : Pat<(v4i32 (vselect (v4i1 VCCR:$pred), (v4i32 MQPR:$v1), (v4i32 MQPR:$v2))),
|
|
|
|
(v4i32 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0, VCCR:$pred))>;
|
2019-07-24 19:51:36 +08:00
|
|
|
|
2019-07-24 22:28:22 +08:00
|
|
|
def : Pat<(v8f16 (vselect (v8i1 VCCR:$pred), (v8f16 MQPR:$v1), (v8f16 MQPR:$v2))),
|
|
|
|
(v8f16 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0, VCCR:$pred))>;
|
|
|
|
def : Pat<(v4f32 (vselect (v4i1 VCCR:$pred), (v4f32 MQPR:$v1), (v4f32 MQPR:$v2))),
|
|
|
|
(v4f32 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0, VCCR:$pred))>;
|
|
|
|
|
2019-07-24 19:51:36 +08:00
|
|
|
def : Pat<(v16i8 (vselect (v16i8 MQPR:$pred), (v16i8 MQPR:$v1), (v16i8 MQPR:$v2))),
|
|
|
|
(v16i8 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0,
|
|
|
|
(MVE_VCMPi8 (v16i8 MQPR:$pred), (MVE_VMOVimmi8 0), 1)))>;
|
|
|
|
def : Pat<(v8i16 (vselect (v8i16 MQPR:$pred), (v8i16 MQPR:$v1), (v8i16 MQPR:$v2))),
|
|
|
|
(v8i16 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0,
|
|
|
|
(MVE_VCMPi16 (v8i16 MQPR:$pred), (MVE_VMOVimmi16 0), 1)))>;
|
|
|
|
def : Pat<(v4i32 (vselect (v4i32 MQPR:$pred), (v4i32 MQPR:$v1), (v4i32 MQPR:$v2))),
|
|
|
|
(v4i32 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0,
|
|
|
|
(MVE_VCMPi32 (v4i32 MQPR:$pred), (MVE_VMOVimmi32 0), 1)))>;
|
|
|
|
|
2019-07-24 22:28:22 +08:00
|
|
|
def : Pat<(v8f16 (vselect (v8i16 MQPR:$pred), (v8f16 MQPR:$v1), (v8f16 MQPR:$v2))),
|
|
|
|
(v8f16 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0,
|
|
|
|
(MVE_VCMPi16 (v8i16 MQPR:$pred), (MVE_VMOVimmi16 0), 1)))>;
|
|
|
|
def : Pat<(v4f32 (vselect (v4i32 MQPR:$pred), (v4f32 MQPR:$v1), (v4f32 MQPR:$v2))),
|
|
|
|
(v4f32 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0,
|
|
|
|
(MVE_VCMPi32 (v4i32 MQPR:$pred), (MVE_VMOVimmi32 0), 1)))>;
|
|
|
|
|
2019-07-28 21:53:39 +08:00
|
|
|
// Pred <-> Int
|
2019-07-24 19:51:36 +08:00
|
|
|
def : Pat<(v16i8 (zext (v16i1 VCCR:$pred))),
|
|
|
|
(v16i8 (MVE_VPSEL (MVE_VMOVimmi8 1), (MVE_VMOVimmi8 0), 0, VCCR:$pred))>;
|
|
|
|
def : Pat<(v8i16 (zext (v8i1 VCCR:$pred))),
|
|
|
|
(v8i16 (MVE_VPSEL (MVE_VMOVimmi16 1), (MVE_VMOVimmi16 0), 0, VCCR:$pred))>;
|
|
|
|
def : Pat<(v4i32 (zext (v4i1 VCCR:$pred))),
|
|
|
|
(v4i32 (MVE_VPSEL (MVE_VMOVimmi32 1), (MVE_VMOVimmi32 0), 0, VCCR:$pred))>;
|
|
|
|
|
|
|
|
def : Pat<(v16i8 (sext (v16i1 VCCR:$pred))),
|
|
|
|
(v16i8 (MVE_VPSEL (MVE_VMOVimmi8 255), (MVE_VMOVimmi8 0), 0, VCCR:$pred))>;
|
|
|
|
def : Pat<(v8i16 (sext (v8i1 VCCR:$pred))),
|
|
|
|
(v8i16 (MVE_VPSEL (MVE_VMOVimmi8 255), (MVE_VMOVimmi16 0), 0, VCCR:$pred))>;
|
|
|
|
def : Pat<(v4i32 (sext (v4i1 VCCR:$pred))),
|
|
|
|
(v4i32 (MVE_VPSEL (MVE_VMOVimmi8 255), (MVE_VMOVimmi32 0), 0, VCCR:$pred))>;
|
|
|
|
|
|
|
|
def : Pat<(v16i8 (anyext (v16i1 VCCR:$pred))),
|
|
|
|
(v16i8 (MVE_VPSEL (MVE_VMOVimmi8 1), (MVE_VMOVimmi8 0), 0, VCCR:$pred))>;
|
|
|
|
def : Pat<(v8i16 (anyext (v8i1 VCCR:$pred))),
|
|
|
|
(v8i16 (MVE_VPSEL (MVE_VMOVimmi16 1), (MVE_VMOVimmi16 0), 0, VCCR:$pred))>;
|
|
|
|
def : Pat<(v4i32 (anyext (v4i1 VCCR:$pred))),
|
|
|
|
(v4i32 (MVE_VPSEL (MVE_VMOVimmi32 1), (MVE_VMOVimmi32 0), 0, VCCR:$pred))>;
|
2019-08-15 17:26:51 +08:00
|
|
|
|
|
|
|
def : Pat<(v16i1 (trunc (v16i8 MQPR:$v1))),
|
|
|
|
(v16i1 (MVE_VCMPi32r (v16i8 MQPR:$v1), ZR, 1))>;
|
|
|
|
def : Pat<(v8i1 (trunc (v8i16 MQPR:$v1))),
|
|
|
|
(v8i1 (MVE_VCMPi32r (v8i16 MQPR:$v1), ZR, 1))>;
|
|
|
|
def : Pat<(v4i1 (trunc (v4i32 MQPR:$v1))),
|
|
|
|
(v4i1 (MVE_VCMPi32r (v4i32 MQPR:$v1), ZR, 1))>;
|
2019-07-24 19:08:14 +08:00
|
|
|
}
|
|
|
|
|
2019-07-28 21:53:39 +08:00
|
|
|
let Predicates = [HasMVEFloat] in {
|
|
|
|
// Pred <-> Float
|
|
|
|
// 112 is 1.0 in float
|
|
|
|
def : Pat<(v4f32 (uint_to_fp (v4i1 VCCR:$pred))),
|
|
|
|
(v4f32 (MVE_VPSEL (v4f32 (MVE_VMOVimmf32 112)), (v4f32 (MVE_VMOVimmi32 0)), 0, VCCR:$pred))>;
|
|
|
|
// 2620 in 1.0 in half
|
|
|
|
def : Pat<(v8f16 (uint_to_fp (v8i1 VCCR:$pred))),
|
|
|
|
(v8f16 (MVE_VPSEL (v8f16 (MVE_VMOVimmi16 2620)), (v8f16 (MVE_VMOVimmi16 0)), 0, VCCR:$pred))>;
|
|
|
|
// 240 is -1.0 in float
|
|
|
|
def : Pat<(v4f32 (sint_to_fp (v4i1 VCCR:$pred))),
|
|
|
|
(v4f32 (MVE_VPSEL (v4f32 (MVE_VMOVimmf32 240)), (v4f32 (MVE_VMOVimmi32 0)), 0, VCCR:$pred))>;
|
|
|
|
// 2748 is -1.0 in half
|
|
|
|
def : Pat<(v8f16 (sint_to_fp (v8i1 VCCR:$pred))),
|
|
|
|
(v8f16 (MVE_VPSEL (v8f16 (MVE_VMOVimmi16 2748)), (v8f16 (MVE_VMOVimmi16 0)), 0, VCCR:$pred))>;
|
|
|
|
|
|
|
|
def : Pat<(v4i1 (fp_to_uint (v4f32 MQPR:$v1))),
|
|
|
|
(v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), ZR, 1))>;
|
|
|
|
def : Pat<(v8i1 (fp_to_uint (v8f16 MQPR:$v1))),
|
|
|
|
(v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), ZR, 1))>;
|
|
|
|
def : Pat<(v4i1 (fp_to_sint (v4f32 MQPR:$v1))),
|
|
|
|
(v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), ZR, 1))>;
|
|
|
|
def : Pat<(v8i1 (fp_to_sint (v8f16 MQPR:$v1))),
|
|
|
|
(v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), ZR, 1))>;
|
|
|
|
}
|
|
|
|
|
2019-07-28 22:07:48 +08:00
|
|
|
def MVE_VPNOT : MVE_p<(outs VCCR:$P0), (ins VCCR:$P0_in), NoItinerary,
|
2019-06-25 19:24:33 +08:00
|
|
|
"vpnot", "", "", vpred_n, "", []> {
|
|
|
|
let Inst{31-0} = 0b11111110001100010000111101001101;
|
|
|
|
let Unpredictable{19-17} = 0b111;
|
|
|
|
let Unpredictable{12} = 0b1;
|
|
|
|
let Unpredictable{7} = 0b1;
|
|
|
|
let Unpredictable{5} = 0b1;
|
|
|
|
|
|
|
|
let Constraints = "";
|
2019-07-28 22:07:48 +08:00
|
|
|
let DecoderMethod = "DecodeMVEVPNOT";
|
2019-06-25 19:24:33 +08:00
|
|
|
}
|
|
|
|
|
2019-07-28 22:07:48 +08:00
|
|
|
let Predicates = [HasMVEInt] in {
|
|
|
|
def : Pat<(v4i1 (xor (v4i1 VCCR:$pred), (v4i1 (predicate_cast (i32 65535))))),
|
|
|
|
(v4i1 (MVE_VPNOT (v4i1 VCCR:$pred)))>;
|
|
|
|
def : Pat<(v8i1 (xor (v8i1 VCCR:$pred), (v8i1 (predicate_cast (i32 65535))))),
|
|
|
|
(v8i1 (MVE_VPNOT (v8i1 VCCR:$pred)))>;
|
|
|
|
def : Pat<(v16i1 (xor (v16i1 VCCR:$pred), (v16i1 (predicate_cast (i32 65535))))),
|
|
|
|
(v16i1 (MVE_VPNOT (v16i1 VCCR:$pred)))>;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2019-06-25 19:24:33 +08:00
|
|
|
class MVE_loltp_start<dag iops, string asm, string ops, bits<2> size>
|
|
|
|
: t2LOL<(outs GPRlr:$LR), iops, asm, ops> {
|
|
|
|
bits<4> Rn;
|
|
|
|
let Predicates = [HasMVEInt];
|
|
|
|
let Inst{22} = 0b0;
|
|
|
|
let Inst{21-20} = size;
|
|
|
|
let Inst{19-16} = Rn{3-0};
|
|
|
|
let Inst{12} = 0b0;
|
|
|
|
}
|
|
|
|
|
|
|
|
class MVE_DLSTP<string asm, bits<2> size>
|
|
|
|
: MVE_loltp_start<(ins rGPR:$Rn), asm, "$LR, $Rn", size> {
|
|
|
|
let Inst{13} = 0b1;
|
|
|
|
let Inst{11-1} = 0b00000000000;
|
|
|
|
let Unpredictable{10-1} = 0b1111111111;
|
|
|
|
}
|
|
|
|
|
|
|
|
class MVE_WLSTP<string asm, bits<2> size>
|
|
|
|
: MVE_loltp_start<(ins rGPR:$Rn, wlslabel_u11:$label),
|
|
|
|
asm, "$LR, $Rn, $label", size> {
|
|
|
|
bits<11> label;
|
|
|
|
let Inst{13} = 0b0;
|
|
|
|
let Inst{11} = label{0};
|
|
|
|
let Inst{10-1} = label{10-1};
|
|
|
|
}
|
|
|
|
|
|
|
|
def MVE_DLSTP_8 : MVE_DLSTP<"dlstp.8", 0b00>;
|
|
|
|
def MVE_DLSTP_16 : MVE_DLSTP<"dlstp.16", 0b01>;
|
|
|
|
def MVE_DLSTP_32 : MVE_DLSTP<"dlstp.32", 0b10>;
|
|
|
|
def MVE_DLSTP_64 : MVE_DLSTP<"dlstp.64", 0b11>;
|
|
|
|
|
|
|
|
def MVE_WLSTP_8 : MVE_WLSTP<"wlstp.8", 0b00>;
|
|
|
|
def MVE_WLSTP_16 : MVE_WLSTP<"wlstp.16", 0b01>;
|
|
|
|
def MVE_WLSTP_32 : MVE_WLSTP<"wlstp.32", 0b10>;
|
|
|
|
def MVE_WLSTP_64 : MVE_WLSTP<"wlstp.64", 0b11>;
|
|
|
|
|
|
|
|
class MVE_loltp_end<dag oops, dag iops, string asm, string ops>
|
|
|
|
: t2LOL<oops, iops, asm, ops> {
|
|
|
|
let Predicates = [HasMVEInt];
|
|
|
|
let Inst{22-21} = 0b00;
|
|
|
|
let Inst{19-16} = 0b1111;
|
|
|
|
let Inst{12} = 0b0;
|
|
|
|
}
|
|
|
|
|
|
|
|
def MVE_LETP : MVE_loltp_end<(outs GPRlr:$LRout),
|
|
|
|
(ins GPRlr:$LRin, lelabel_u11:$label),
|
|
|
|
"letp", "$LRin, $label"> {
|
|
|
|
bits<11> label;
|
|
|
|
let Inst{20} = 0b1;
|
|
|
|
let Inst{13} = 0b0;
|
|
|
|
let Inst{11} = label{0};
|
|
|
|
let Inst{10-1} = label{10-1};
|
|
|
|
}
|
|
|
|
|
|
|
|
def MVE_LCTP : MVE_loltp_end<(outs), (ins pred:$p), "lctp${p}", ""> {
|
|
|
|
let Inst{20} = 0b0;
|
|
|
|
let Inst{13} = 0b1;
|
|
|
|
let Inst{11-1} = 0b00000000000;
|
|
|
|
let Unpredictable{21-20} = 0b11;
|
|
|
|
let Unpredictable{11-1} = 0b11111111111;
|
|
|
|
}
|
[ARM] Code-generation infrastructure for MVE.
This provides the low-level support to start using MVE vector types in
LLVM IR, loading and storing them, passing them to __asm__ statements
containing hand-written MVE vector instructions, and *if* you have the
hard-float ABI turned on, using them as function parameters.
(In the soft-float ABI, vector types are passed in integer registers,
and combining all those 32-bit integers into a q-reg requires support
for selection DAG nodes like insert_vector_elt and build_vector which
aren't implemented yet for MVE. In fact I've also had to add
`arm_aapcs_vfpcc` to a couple of existing tests to avoid that
problem.)
Specifically, this commit adds support for:
* spills, reloads and register moves for MVE vector registers
* ditto for the VPT predication mask that lives in VPR.P0
* make all the MVE vector types legal in ISel, and provide selection
DAG patterns for BITCAST, LOAD and STORE
* make loads and stores of scalar FP types conditional on
`hasFPRegs()` rather than `hasVFP2Base()`. As a result a few
existing tests needed their llc command lines updating to use
`-mattr=-fpregs` as their method of turning off all hardware FP
support.
Reviewers: dmgreen, samparker, SjoerdMeijer
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D60708
llvm-svn: 364329
2019-06-26 00:48:46 +08:00
|
|
|
|
|
|
|
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// Patterns
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2019-11-27 00:18:58 +08:00
|
|
|
// PatFrags for loads and stores. Often trying to keep semi-consistent names.
|
2019-08-22 00:20:35 +08:00
|
|
|
|
2019-08-08 23:27:58 +08:00
|
|
|
def aligned32_pre_store : PatFrag<(ops node:$val, node:$ptr, node:$offset),
|
|
|
|
(pre_store node:$val, node:$ptr, node:$offset), [{
|
|
|
|
return cast<StoreSDNode>(N)->getAlignment() >= 4;
|
|
|
|
}]>;
|
|
|
|
def aligned32_post_store : PatFrag<(ops node:$val, node:$ptr, node:$offset),
|
2019-08-22 00:20:35 +08:00
|
|
|
(post_store node:$val, node:$ptr, node:$offset), [{
|
2019-08-08 23:27:58 +08:00
|
|
|
return cast<StoreSDNode>(N)->getAlignment() >= 4;
|
|
|
|
}]>;
|
|
|
|
def aligned16_pre_store : PatFrag<(ops node:$val, node:$ptr, node:$offset),
|
|
|
|
(pre_store node:$val, node:$ptr, node:$offset), [{
|
2019-08-15 20:54:47 +08:00
|
|
|
return cast<StoreSDNode>(N)->getAlignment() >= 2;
|
2019-08-08 23:27:58 +08:00
|
|
|
}]>;
|
|
|
|
def aligned16_post_store : PatFrag<(ops node:$val, node:$ptr, node:$offset),
|
2019-08-22 00:20:35 +08:00
|
|
|
(post_store node:$val, node:$ptr, node:$offset), [{
|
2019-08-15 20:54:47 +08:00
|
|
|
return cast<StoreSDNode>(N)->getAlignment() >= 2;
|
2019-08-08 23:27:58 +08:00
|
|
|
}]>;
|
|
|
|
|
2019-10-17 15:55:55 +08:00
|
|
|
|
2019-11-27 00:18:58 +08:00
|
|
|
def aligned_maskedloadvi8 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru),
|
|
|
|
(masked_ld node:$ptr, undef, node:$pred, node:$passthru), [{
|
2019-10-17 15:55:55 +08:00
|
|
|
auto *Ld = cast<MaskedLoadSDNode>(N);
|
|
|
|
return Ld->getMemoryVT().getScalarType() == MVT::i8;
|
|
|
|
}]>;
|
2019-11-27 00:18:58 +08:00
|
|
|
def aligned_sextmaskedloadvi8 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru),
|
|
|
|
(aligned_maskedloadvi8 node:$ptr, node:$pred, node:$passthru), [{
|
2019-10-17 15:55:55 +08:00
|
|
|
return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::SEXTLOAD;
|
|
|
|
}]>;
|
2019-11-27 00:18:58 +08:00
|
|
|
def aligned_zextmaskedloadvi8 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru),
|
|
|
|
(aligned_maskedloadvi8 node:$ptr, node:$pred, node:$passthru), [{
|
2019-10-17 15:55:55 +08:00
|
|
|
return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::ZEXTLOAD;
|
|
|
|
}]>;
|
2019-11-27 00:18:58 +08:00
|
|
|
def aligned_extmaskedloadvi8 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru),
|
|
|
|
(aligned_maskedloadvi8 node:$ptr, node:$pred, node:$passthru), [{
|
2019-10-17 15:55:55 +08:00
|
|
|
auto *Ld = cast<MaskedLoadSDNode>(N);
|
|
|
|
EVT ScalarVT = Ld->getMemoryVT().getScalarType();
|
|
|
|
return ScalarVT.isInteger() && Ld->getExtensionType() == ISD::EXTLOAD;
|
|
|
|
}]>;
|
2019-11-27 00:18:58 +08:00
|
|
|
def aligned_maskedloadvi16: PatFrag<(ops node:$ptr, node:$pred, node:$passthru),
|
|
|
|
(masked_ld node:$ptr, undef, node:$pred, node:$passthru), [{
|
2019-10-17 15:55:55 +08:00
|
|
|
auto *Ld = cast<MaskedLoadSDNode>(N);
|
|
|
|
EVT ScalarVT = Ld->getMemoryVT().getScalarType();
|
|
|
|
return (ScalarVT == MVT::i16 || ScalarVT == MVT::f16) && Ld->getAlignment() >= 2;
|
2019-09-15 22:14:47 +08:00
|
|
|
}]>;
|
2019-11-27 00:18:58 +08:00
|
|
|
def aligned_sextmaskedloadvi16 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru),
|
|
|
|
(aligned_maskedloadvi16 node:$ptr, node:$pred, node:$passthru), [{
|
2019-10-17 15:55:55 +08:00
|
|
|
return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::SEXTLOAD;
|
|
|
|
}]>;
|
2019-11-27 00:18:58 +08:00
|
|
|
def aligned_zextmaskedloadvi16 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru),
|
|
|
|
(aligned_maskedloadvi16 node:$ptr, node:$pred, node:$passthru), [{
|
2019-10-17 15:55:55 +08:00
|
|
|
return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::ZEXTLOAD;
|
|
|
|
}]>;
|
2019-11-27 00:18:58 +08:00
|
|
|
def aligned_extmaskedloadvi16 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru),
|
|
|
|
(aligned_maskedloadvi16 node:$ptr, node:$pred, node:$passthru), [{
|
2019-10-17 15:55:55 +08:00
|
|
|
auto *Ld = cast<MaskedLoadSDNode>(N);
|
|
|
|
EVT ScalarVT = Ld->getMemoryVT().getScalarType();
|
|
|
|
return ScalarVT.isInteger() && Ld->getExtensionType() == ISD::EXTLOAD;
|
|
|
|
}]>;
|
2019-11-27 00:18:58 +08:00
|
|
|
def aligned_maskedloadvi32: PatFrag<(ops node:$ptr, node:$pred, node:$passthru),
|
|
|
|
(masked_ld node:$ptr, undef, node:$pred, node:$passthru), [{
|
2019-10-17 15:55:55 +08:00
|
|
|
auto *Ld = cast<MaskedLoadSDNode>(N);
|
|
|
|
EVT ScalarVT = Ld->getMemoryVT().getScalarType();
|
|
|
|
return (ScalarVT == MVT::i32 || ScalarVT == MVT::f32) && Ld->getAlignment() >= 4;
|
2019-09-15 22:14:47 +08:00
|
|
|
}]>;
|
|
|
|
|
2019-11-27 00:18:58 +08:00
|
|
|
def aligned_maskedstvi8 : PatFrag<(ops node:$val, node:$ptr, node:$pred),
|
|
|
|
(masked_st node:$val, node:$ptr, undef, node:$pred), [{
|
2019-10-17 20:11:18 +08:00
|
|
|
return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
|
2019-09-15 22:14:47 +08:00
|
|
|
}]>;
|
2019-11-27 00:18:58 +08:00
|
|
|
def aligned_maskedstvi16 : PatFrag<(ops node:$val, node:$ptr, node:$pred),
|
|
|
|
(masked_st node:$val, node:$ptr, undef, node:$pred), [{
|
2019-10-17 20:11:18 +08:00
|
|
|
auto *St = cast<MaskedStoreSDNode>(N);
|
|
|
|
EVT ScalarVT = St->getMemoryVT().getScalarType();
|
|
|
|
return (ScalarVT == MVT::i16 || ScalarVT == MVT::f16) && St->getAlignment() >= 2;
|
2019-09-15 22:14:47 +08:00
|
|
|
}]>;
|
2019-11-27 00:18:58 +08:00
|
|
|
def aligned_maskedstvi32 : PatFrag<(ops node:$val, node:$ptr, node:$pred),
|
|
|
|
(masked_st node:$val, node:$ptr, undef, node:$pred), [{
|
2019-10-17 20:11:18 +08:00
|
|
|
auto *St = cast<MaskedStoreSDNode>(N);
|
|
|
|
EVT ScalarVT = St->getMemoryVT().getScalarType();
|
|
|
|
return (ScalarVT == MVT::i32 || ScalarVT == MVT::f32) && St->getAlignment() >= 4;
|
|
|
|
}]>;
|
2019-09-15 22:14:47 +08:00
|
|
|
|
2019-11-21 22:56:37 +08:00
|
|
|
def pre_maskedstore : PatFrag<(ops node:$val, node:$base, node:$offset, node:$mask),
|
|
|
|
(masked_st node:$val, node:$base, node:$offset, node:$mask), [{
|
|
|
|
ISD::MemIndexedMode AM = cast<MaskedStoreSDNode>(N)->getAddressingMode();
|
|
|
|
return AM == ISD::PRE_INC || AM == ISD::PRE_DEC;
|
|
|
|
}]>;
|
|
|
|
def post_maskedstore : PatFrag<(ops node:$val, node:$base, node:$offset, node:$mask),
|
|
|
|
(masked_st node:$val, node:$base, node:$offset, node:$mask), [{
|
|
|
|
ISD::MemIndexedMode AM = cast<MaskedStoreSDNode>(N)->getAddressingMode();
|
|
|
|
return AM == ISD::POST_INC || AM == ISD::POST_DEC;
|
|
|
|
}]>;
|
2019-11-27 00:18:58 +08:00
|
|
|
def aligned_pre_maskedstorevi8 : PatFrag<(ops node:$val, node:$ptr, node:$offset, node:$mask),
|
|
|
|
(pre_maskedstore node:$val, node:$ptr, node:$offset, node:$mask), [{
|
|
|
|
return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
|
2019-11-21 22:56:37 +08:00
|
|
|
}]>;
|
2019-11-27 00:18:58 +08:00
|
|
|
def aligned_post_maskedstorevi8 : PatFrag<(ops node:$val, node:$ptr, node:$offset, node:$mask),
|
|
|
|
(post_maskedstore node:$val, node:$ptr, node:$offset, node:$mask), [{
|
|
|
|
return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
|
2019-11-21 22:56:37 +08:00
|
|
|
}]>;
|
2019-11-27 00:18:58 +08:00
|
|
|
def aligned_pre_maskedstorevi16 : PatFrag<(ops node:$val, node:$ptr, node:$offset, node:$mask),
|
|
|
|
(pre_maskedstore node:$val, node:$ptr, node:$offset, node:$mask), [{
|
|
|
|
auto *St = cast<MaskedStoreSDNode>(N);
|
|
|
|
EVT ScalarVT = St->getMemoryVT().getScalarType();
|
|
|
|
return (ScalarVT == MVT::i16 || ScalarVT == MVT::f16) && St->getAlignment() >= 2;
|
2019-11-21 22:56:37 +08:00
|
|
|
}]>;
|
2019-11-27 00:18:58 +08:00
|
|
|
def aligned_post_maskedstorevi16 : PatFrag<(ops node:$val, node:$ptr, node:$offset, node:$mask),
|
|
|
|
(post_maskedstore node:$val, node:$ptr, node:$offset, node:$mask), [{
|
|
|
|
auto *St = cast<MaskedStoreSDNode>(N);
|
|
|
|
EVT ScalarVT = St->getMemoryVT().getScalarType();
|
|
|
|
return (ScalarVT == MVT::i16 || ScalarVT == MVT::f16) && St->getAlignment() >= 2;
|
|
|
|
}]>;
|
|
|
|
def aligned_pre_maskedstorevi32 : PatFrag<(ops node:$val, node:$ptr, node:$offset, node:$mask),
|
|
|
|
(pre_maskedstore node:$val, node:$ptr, node:$offset, node:$mask), [{
|
|
|
|
auto *St = cast<MaskedStoreSDNode>(N);
|
|
|
|
EVT ScalarVT = St->getMemoryVT().getScalarType();
|
|
|
|
return (ScalarVT == MVT::i32 || ScalarVT == MVT::f32) && St->getAlignment() >= 4;
|
|
|
|
}]>;
|
|
|
|
def aligned_post_maskedstorevi32 : PatFrag<(ops node:$val, node:$ptr, node:$offset, node:$mask),
|
|
|
|
(post_maskedstore node:$val, node:$ptr, node:$offset, node:$mask), [{
|
|
|
|
auto *St = cast<MaskedStoreSDNode>(N);
|
|
|
|
EVT ScalarVT = St->getMemoryVT().getScalarType();
|
|
|
|
return (ScalarVT == MVT::i32 || ScalarVT == MVT::f32) && St->getAlignment() >= 4;
|
2019-11-21 22:56:37 +08:00
|
|
|
}]>;
|
|
|
|
|
|
|
|
|
2019-11-27 00:18:58 +08:00
|
|
|
// PatFrags for "Aligned" extending / truncating
|
|
|
|
|
|
|
|
def aligned_extloadvi8 : PatFrag<(ops node:$ptr), (extloadvi8 node:$ptr)>;
|
|
|
|
def aligned_sextloadvi8 : PatFrag<(ops node:$ptr), (sextloadvi8 node:$ptr)>;
|
|
|
|
def aligned_zextloadvi8 : PatFrag<(ops node:$ptr), (zextloadvi8 node:$ptr)>;
|
|
|
|
|
|
|
|
def aligned_truncstvi8 : PatFrag<(ops node:$val, node:$ptr),
|
|
|
|
(truncstorevi8 node:$val, node:$ptr)>;
|
|
|
|
def aligned_post_truncstvi8 : PatFrag<(ops node:$val, node:$base, node:$offset),
|
|
|
|
(post_truncstvi8 node:$val, node:$base, node:$offset)>;
|
|
|
|
def aligned_pre_truncstvi8 : PatFrag<(ops node:$val, node:$base, node:$offset),
|
|
|
|
(pre_truncstvi8 node:$val, node:$base, node:$offset)>;
|
|
|
|
|
|
|
|
let MinAlignment = 2 in {
|
|
|
|
def aligned_extloadvi16 : PatFrag<(ops node:$ptr), (extloadvi16 node:$ptr)>;
|
|
|
|
def aligned_sextloadvi16 : PatFrag<(ops node:$ptr), (sextloadvi16 node:$ptr)>;
|
|
|
|
def aligned_zextloadvi16 : PatFrag<(ops node:$ptr), (zextloadvi16 node:$ptr)>;
|
|
|
|
|
|
|
|
def aligned_truncstvi16 : PatFrag<(ops node:$val, node:$ptr),
|
|
|
|
(truncstorevi16 node:$val, node:$ptr)>;
|
|
|
|
def aligned_post_truncstvi16 : PatFrag<(ops node:$val, node:$base, node:$offset),
|
|
|
|
(post_truncstvi16 node:$val, node:$base, node:$offset)>;
|
|
|
|
def aligned_pre_truncstvi16 : PatFrag<(ops node:$val, node:$base, node:$offset),
|
|
|
|
(pre_truncstvi16 node:$val, node:$base, node:$offset)>;
|
|
|
|
}
|
|
|
|
|
|
|
|
def truncmaskedst : PatFrag<(ops node:$val, node:$base, node:$pred),
|
|
|
|
(masked_st node:$val, node:$base, undef, node:$pred), [{
|
|
|
|
return cast<MaskedStoreSDNode>(N)->isTruncatingStore();
|
|
|
|
}]>;
|
|
|
|
def aligned_truncmaskedstvi8 : PatFrag<(ops node:$val, node:$base, node:$pred),
|
|
|
|
(truncmaskedst node:$val, node:$base, node:$pred), [{
|
|
|
|
return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
|
|
|
|
}]>;
|
|
|
|
def aligned_truncmaskedstvi16 : PatFrag<(ops node:$val, node:$base, node:$pred),
|
|
|
|
(truncmaskedst node:$val, node:$base, node:$pred), [{
|
|
|
|
auto *St = cast<MaskedStoreSDNode>(N);
|
|
|
|
EVT ScalarVT = St->getMemoryVT().getScalarType();
|
|
|
|
return (ScalarVT == MVT::i16 || ScalarVT == MVT::f16) && St->getAlignment() >= 2;
|
|
|
|
}]>;
|
|
|
|
def pre_truncmaskedst : PatFrag<(ops node:$val, node:$base, node:$offset, node:$pred),
|
|
|
|
(masked_st node:$val, node:$base, node:$offset, node:$pred), [{
|
|
|
|
ISD::MemIndexedMode AM = cast<MaskedStoreSDNode>(N)->getAddressingMode();
|
|
|
|
return cast<MaskedStoreSDNode>(N)->isTruncatingStore() && (AM == ISD::PRE_INC || AM == ISD::PRE_DEC);
|
|
|
|
}]>;
|
|
|
|
def aligned_pre_truncmaskedstvi8 : PatFrag<(ops node:$val, node:$base, node:$offset, node:$pred),
|
|
|
|
(pre_truncmaskedst node:$val, node:$base, node:$offset, node:$pred), [{
|
|
|
|
return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
|
|
|
|
}]>;
|
|
|
|
def aligned_pre_truncmaskedstvi16 : PatFrag<(ops node:$val, node:$base, node:$offset, node:$pred),
|
|
|
|
(pre_truncmaskedst node:$val, node:$base, node:$offset, node:$pred), [{
|
|
|
|
auto *St = cast<MaskedStoreSDNode>(N);
|
|
|
|
EVT ScalarVT = St->getMemoryVT().getScalarType();
|
|
|
|
return (ScalarVT == MVT::i16 || ScalarVT == MVT::f16) && St->getAlignment() >= 2;
|
|
|
|
}]>;
|
|
|
|
def post_truncmaskedst : PatFrag<(ops node:$val, node:$base, node:$offset, node:$postd),
|
|
|
|
(masked_st node:$val, node:$base, node:$offset, node:$postd), [{
|
|
|
|
ISD::MemIndexedMode AM = cast<MaskedStoreSDNode>(N)->getAddressingMode();
|
|
|
|
return cast<MaskedStoreSDNode>(N)->isTruncatingStore() && (AM == ISD::POST_INC || AM == ISD::POST_DEC);
|
|
|
|
}]>;
|
|
|
|
def aligned_post_truncmaskedstvi8 : PatFrag<(ops node:$val, node:$base, node:$offset, node:$postd),
|
|
|
|
(post_truncmaskedst node:$val, node:$base, node:$offset, node:$postd), [{
|
|
|
|
return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
|
|
|
|
}]>;
|
|
|
|
def aligned_post_truncmaskedstvi16 : PatFrag<(ops node:$val, node:$base, node:$offset, node:$postd),
|
|
|
|
(post_truncmaskedst node:$val, node:$base, node:$offset, node:$postd), [{
|
|
|
|
auto *St = cast<MaskedStoreSDNode>(N);
|
|
|
|
EVT ScalarVT = St->getMemoryVT().getScalarType();
|
|
|
|
return (ScalarVT == MVT::i16 || ScalarVT == MVT::f16) && St->getAlignment() >= 2;
|
|
|
|
}]>;
|
|
|
|
|
|
|
|
// Load/store patterns
|
|
|
|
|
|
|
|
class MVE_vector_store_typed<ValueType Ty, Instruction RegImmInst,
|
|
|
|
PatFrag StoreKind, int shift>
|
|
|
|
: Pat<(StoreKind (Ty MQPR:$val), t2addrmode_imm7<shift>:$addr),
|
|
|
|
(RegImmInst (Ty MQPR:$val), t2addrmode_imm7<shift>:$addr)>;
|
|
|
|
|
|
|
|
class MVE_vector_maskedstore_typed<ValueType Ty, Instruction RegImmInst,
|
|
|
|
PatFrag StoreKind, int shift>
|
|
|
|
: Pat<(StoreKind (Ty MQPR:$val), t2addrmode_imm7<shift>:$addr, VCCR:$pred),
|
|
|
|
(RegImmInst (Ty MQPR:$val), t2addrmode_imm7<shift>:$addr, (i32 1), VCCR:$pred)>;
|
|
|
|
|
|
|
|
multiclass MVE_vector_store<Instruction RegImmInst, PatFrag StoreKind,
|
|
|
|
int shift> {
|
|
|
|
def : MVE_vector_store_typed<v16i8, RegImmInst, StoreKind, shift>;
|
|
|
|
def : MVE_vector_store_typed<v8i16, RegImmInst, StoreKind, shift>;
|
|
|
|
def : MVE_vector_store_typed<v8f16, RegImmInst, StoreKind, shift>;
|
|
|
|
def : MVE_vector_store_typed<v4i32, RegImmInst, StoreKind, shift>;
|
|
|
|
def : MVE_vector_store_typed<v4f32, RegImmInst, StoreKind, shift>;
|
|
|
|
def : MVE_vector_store_typed<v2i64, RegImmInst, StoreKind, shift>;
|
|
|
|
def : MVE_vector_store_typed<v2f64, RegImmInst, StoreKind, shift>;
|
|
|
|
}
|
|
|
|
|
|
|
|
class MVE_vector_load_typed<ValueType Ty, Instruction RegImmInst,
|
|
|
|
PatFrag LoadKind, int shift>
|
|
|
|
: Pat<(Ty (LoadKind t2addrmode_imm7<shift>:$addr)),
|
|
|
|
(Ty (RegImmInst t2addrmode_imm7<shift>:$addr))>;
|
|
|
|
|
|
|
|
class MVE_vector_maskedload_typed<ValueType Ty, Instruction RegImmInst,
|
|
|
|
PatFrag LoadKind, int shift>
|
|
|
|
: Pat<(Ty (LoadKind t2addrmode_imm7<shift>:$addr, VCCR:$pred, (Ty NEONimmAllZerosV))),
|
|
|
|
(Ty (RegImmInst t2addrmode_imm7<shift>:$addr, (i32 1), VCCR:$pred))>;
|
|
|
|
|
|
|
|
multiclass MVE_vector_load<Instruction RegImmInst, PatFrag LoadKind,
|
|
|
|
int shift> {
|
|
|
|
def : MVE_vector_load_typed<v16i8, RegImmInst, LoadKind, shift>;
|
|
|
|
def : MVE_vector_load_typed<v8i16, RegImmInst, LoadKind, shift>;
|
|
|
|
def : MVE_vector_load_typed<v8f16, RegImmInst, LoadKind, shift>;
|
|
|
|
def : MVE_vector_load_typed<v4i32, RegImmInst, LoadKind, shift>;
|
|
|
|
def : MVE_vector_load_typed<v4f32, RegImmInst, LoadKind, shift>;
|
|
|
|
def : MVE_vector_load_typed<v2i64, RegImmInst, LoadKind, shift>;
|
|
|
|
def : MVE_vector_load_typed<v2f64, RegImmInst, LoadKind, shift>;
|
|
|
|
}
|
|
|
|
|
|
|
|
class MVE_vector_offset_store_typed<ValueType Ty, Instruction Opcode,
|
|
|
|
PatFrag StoreKind, int shift>
|
|
|
|
: Pat<(StoreKind (Ty MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<shift>:$addr),
|
|
|
|
(Opcode MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<shift>:$addr)>;
|
|
|
|
|
|
|
|
class MVE_vector_offset_maskedstore_typed<ValueType Ty, Instruction Opcode,
|
|
|
|
PatFrag StoreKind, int shift>
|
|
|
|
: Pat<(StoreKind (Ty MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<shift>:$addr, VCCR:$pred),
|
|
|
|
(Opcode MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<shift>:$addr, (i32 1), VCCR:$pred)>;
|
|
|
|
|
|
|
|
multiclass MVE_vector_offset_store<Instruction RegImmInst, PatFrag StoreKind,
|
|
|
|
int shift> {
|
|
|
|
def : MVE_vector_offset_store_typed<v16i8, RegImmInst, StoreKind, shift>;
|
|
|
|
def : MVE_vector_offset_store_typed<v8i16, RegImmInst, StoreKind, shift>;
|
|
|
|
def : MVE_vector_offset_store_typed<v8f16, RegImmInst, StoreKind, shift>;
|
|
|
|
def : MVE_vector_offset_store_typed<v4i32, RegImmInst, StoreKind, shift>;
|
|
|
|
def : MVE_vector_offset_store_typed<v4f32, RegImmInst, StoreKind, shift>;
|
|
|
|
def : MVE_vector_offset_store_typed<v2i64, RegImmInst, StoreKind, shift>;
|
|
|
|
def : MVE_vector_offset_store_typed<v2f64, RegImmInst, StoreKind, shift>;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
[ARM] Code-generation infrastructure for MVE.
This provides the low-level support to start using MVE vector types in
LLVM IR, loading and storing them, passing them to __asm__ statements
containing hand-written MVE vector instructions, and *if* you have the
hard-float ABI turned on, using them as function parameters.
(In the soft-float ABI, vector types are passed in integer registers,
and combining all those 32-bit integers into a q-reg requires support
for selection DAG nodes like insert_vector_elt and build_vector which
aren't implemented yet for MVE. In fact I've also had to add
`arm_aapcs_vfpcc` to a couple of existing tests to avoid that
problem.)
Specifically, this commit adds support for:
* spills, reloads and register moves for MVE vector registers
* ditto for the VPT predication mask that lives in VPR.P0
* make all the MVE vector types legal in ISel, and provide selection
DAG patterns for BITCAST, LOAD and STORE
* make loads and stores of scalar FP types conditional on
`hasFPRegs()` rather than `hasVFP2Base()`. As a result a few
existing tests needed their llc command lines updating to use
`-mattr=-fpregs` as their method of turning off all hardware FP
support.
Reviewers: dmgreen, samparker, SjoerdMeijer
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D60708
llvm-svn: 364329
2019-06-26 00:48:46 +08:00
|
|
|
let Predicates = [HasMVEInt, IsLE] in {
|
2019-08-22 00:20:35 +08:00
|
|
|
// Stores
|
|
|
|
defm : MVE_vector_store<MVE_VSTRBU8, byte_alignedstore, 0>;
|
|
|
|
defm : MVE_vector_store<MVE_VSTRHU16, hword_alignedstore, 1>;
|
|
|
|
defm : MVE_vector_store<MVE_VSTRWU32, alignedstore32, 2>;
|
[ARM] Code-generation infrastructure for MVE.
This provides the low-level support to start using MVE vector types in
LLVM IR, loading and storing them, passing them to __asm__ statements
containing hand-written MVE vector instructions, and *if* you have the
hard-float ABI turned on, using them as function parameters.
(In the soft-float ABI, vector types are passed in integer registers,
and combining all those 32-bit integers into a q-reg requires support
for selection DAG nodes like insert_vector_elt and build_vector which
aren't implemented yet for MVE. In fact I've also had to add
`arm_aapcs_vfpcc` to a couple of existing tests to avoid that
problem.)
Specifically, this commit adds support for:
* spills, reloads and register moves for MVE vector registers
* ditto for the VPT predication mask that lives in VPR.P0
* make all the MVE vector types legal in ISel, and provide selection
DAG patterns for BITCAST, LOAD and STORE
* make loads and stores of scalar FP types conditional on
`hasFPRegs()` rather than `hasVFP2Base()`. As a result a few
existing tests needed their llc command lines updating to use
`-mattr=-fpregs` as their method of turning off all hardware FP
support.
Reviewers: dmgreen, samparker, SjoerdMeijer
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D60708
llvm-svn: 364329
2019-06-26 00:48:46 +08:00
|
|
|
|
2019-08-22 00:20:35 +08:00
|
|
|
// Loads
|
|
|
|
defm : MVE_vector_load<MVE_VLDRBU8, byte_alignedload, 0>;
|
|
|
|
defm : MVE_vector_load<MVE_VLDRHU16, hword_alignedload, 1>;
|
|
|
|
defm : MVE_vector_load<MVE_VLDRWU32, alignedload32, 2>;
|
2019-08-08 23:27:58 +08:00
|
|
|
|
2019-08-22 00:20:35 +08:00
|
|
|
// Pre/post inc stores
|
|
|
|
defm : MVE_vector_offset_store<MVE_VSTRBU8_pre, pre_store, 0>;
|
|
|
|
defm : MVE_vector_offset_store<MVE_VSTRBU8_post, post_store, 0>;
|
|
|
|
defm : MVE_vector_offset_store<MVE_VSTRHU16_pre, aligned16_pre_store, 1>;
|
|
|
|
defm : MVE_vector_offset_store<MVE_VSTRHU16_post, aligned16_post_store, 1>;
|
|
|
|
defm : MVE_vector_offset_store<MVE_VSTRWU32_pre, aligned32_pre_store, 2>;
|
|
|
|
defm : MVE_vector_offset_store<MVE_VSTRWU32_post, aligned32_post_store, 2>;
|
[ARM] Code-generation infrastructure for MVE.
This provides the low-level support to start using MVE vector types in
LLVM IR, loading and storing them, passing them to __asm__ statements
containing hand-written MVE vector instructions, and *if* you have the
hard-float ABI turned on, using them as function parameters.
(In the soft-float ABI, vector types are passed in integer registers,
and combining all those 32-bit integers into a q-reg requires support
for selection DAG nodes like insert_vector_elt and build_vector which
aren't implemented yet for MVE. In fact I've also had to add
`arm_aapcs_vfpcc` to a couple of existing tests to avoid that
problem.)
Specifically, this commit adds support for:
* spills, reloads and register moves for MVE vector registers
* ditto for the VPT predication mask that lives in VPR.P0
* make all the MVE vector types legal in ISel, and provide selection
DAG patterns for BITCAST, LOAD and STORE
* make loads and stores of scalar FP types conditional on
`hasFPRegs()` rather than `hasVFP2Base()`. As a result a few
existing tests needed their llc command lines updating to use
`-mattr=-fpregs` as their method of turning off all hardware FP
support.
Reviewers: dmgreen, samparker, SjoerdMeijer
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D60708
llvm-svn: 364329
2019-06-26 00:48:46 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
let Predicates = [HasMVEInt, IsBE] in {
|
2019-08-22 00:20:35 +08:00
|
|
|
// Aligned Stores
|
|
|
|
def : MVE_vector_store_typed<v16i8, MVE_VSTRBU8, store, 0>;
|
|
|
|
def : MVE_vector_store_typed<v8i16, MVE_VSTRHU16, alignedstore16, 1>;
|
|
|
|
def : MVE_vector_store_typed<v8f16, MVE_VSTRHU16, alignedstore16, 1>;
|
|
|
|
def : MVE_vector_store_typed<v4i32, MVE_VSTRWU32, alignedstore32, 2>;
|
|
|
|
def : MVE_vector_store_typed<v4f32, MVE_VSTRWU32, alignedstore32, 2>;
|
|
|
|
|
|
|
|
// Aligned Loads
|
|
|
|
def : MVE_vector_load_typed<v16i8, MVE_VLDRBU8, load, 0>;
|
|
|
|
def : MVE_vector_load_typed<v8i16, MVE_VLDRHU16, alignedload16, 1>;
|
|
|
|
def : MVE_vector_load_typed<v8f16, MVE_VLDRHU16, alignedload16, 1>;
|
|
|
|
def : MVE_vector_load_typed<v4i32, MVE_VLDRWU32, alignedload32, 2>;
|
|
|
|
def : MVE_vector_load_typed<v4f32, MVE_VLDRWU32, alignedload32, 2>;
|
2019-08-08 23:15:19 +08:00
|
|
|
|
|
|
|
// Other unaligned loads/stores need to go though a VREV
|
|
|
|
def : Pat<(v2f64 (load t2addrmode_imm7<0>:$addr)),
|
|
|
|
(v2f64 (MVE_VREV64_8 (MVE_VLDRBU8 t2addrmode_imm7<0>:$addr)))>;
|
|
|
|
def : Pat<(v2i64 (load t2addrmode_imm7<0>:$addr)),
|
|
|
|
(v2i64 (MVE_VREV64_8 (MVE_VLDRBU8 t2addrmode_imm7<0>:$addr)))>;
|
|
|
|
def : Pat<(v4i32 (load t2addrmode_imm7<0>:$addr)),
|
|
|
|
(v4i32 (MVE_VREV32_8 (MVE_VLDRBU8 t2addrmode_imm7<0>:$addr)))>;
|
|
|
|
def : Pat<(v4f32 (load t2addrmode_imm7<0>:$addr)),
|
|
|
|
(v4f32 (MVE_VREV32_8 (MVE_VLDRBU8 t2addrmode_imm7<0>:$addr)))>;
|
|
|
|
def : Pat<(v8i16 (load t2addrmode_imm7<0>:$addr)),
|
|
|
|
(v8i16 (MVE_VREV16_8 (MVE_VLDRBU8 t2addrmode_imm7<0>:$addr)))>;
|
|
|
|
def : Pat<(v8f16 (load t2addrmode_imm7<0>:$addr)),
|
|
|
|
(v8f16 (MVE_VREV16_8 (MVE_VLDRBU8 t2addrmode_imm7<0>:$addr)))>;
|
|
|
|
def : Pat<(store (v2f64 MQPR:$val), t2addrmode_imm7<0>:$addr),
|
|
|
|
(MVE_VSTRBU8 (MVE_VREV64_8 MQPR:$val), t2addrmode_imm7<0>:$addr)>;
|
|
|
|
def : Pat<(store (v2i64 MQPR:$val), t2addrmode_imm7<0>:$addr),
|
|
|
|
(MVE_VSTRBU8 (MVE_VREV64_8 MQPR:$val), t2addrmode_imm7<0>:$addr)>;
|
|
|
|
def : Pat<(store (v4i32 MQPR:$val), t2addrmode_imm7<0>:$addr),
|
|
|
|
(MVE_VSTRBU8 (MVE_VREV32_8 MQPR:$val), t2addrmode_imm7<0>:$addr)>;
|
|
|
|
def : Pat<(store (v4f32 MQPR:$val), t2addrmode_imm7<0>:$addr),
|
|
|
|
(MVE_VSTRBU8 (MVE_VREV32_8 MQPR:$val), t2addrmode_imm7<0>:$addr)>;
|
|
|
|
def : Pat<(store (v8i16 MQPR:$val), t2addrmode_imm7<0>:$addr),
|
|
|
|
(MVE_VSTRBU8 (MVE_VREV16_8 MQPR:$val), t2addrmode_imm7<0>:$addr)>;
|
|
|
|
def : Pat<(store (v8f16 MQPR:$val), t2addrmode_imm7<0>:$addr),
|
|
|
|
(MVE_VSTRBU8 (MVE_VREV16_8 MQPR:$val), t2addrmode_imm7<0>:$addr)>;
|
2019-08-08 23:27:58 +08:00
|
|
|
|
2019-08-22 00:20:35 +08:00
|
|
|
// Pre/Post inc stores
|
|
|
|
def : MVE_vector_offset_store_typed<v16i8, MVE_VSTRBU8_pre, pre_store, 0>;
|
|
|
|
def : MVE_vector_offset_store_typed<v16i8, MVE_VSTRBU8_post, post_store, 0>;
|
|
|
|
def : MVE_vector_offset_store_typed<v8i16, MVE_VSTRHU16_pre, aligned16_pre_store, 1>;
|
|
|
|
def : MVE_vector_offset_store_typed<v8i16, MVE_VSTRHU16_post, aligned16_post_store, 1>;
|
|
|
|
def : MVE_vector_offset_store_typed<v8f16, MVE_VSTRHU16_pre, aligned16_pre_store, 1>;
|
|
|
|
def : MVE_vector_offset_store_typed<v8f16, MVE_VSTRHU16_post, aligned16_post_store, 1>;
|
|
|
|
def : MVE_vector_offset_store_typed<v4i32, MVE_VSTRWU32_pre, aligned32_pre_store, 2>;
|
|
|
|
def : MVE_vector_offset_store_typed<v4i32, MVE_VSTRWU32_post, aligned32_post_store, 2>;
|
|
|
|
def : MVE_vector_offset_store_typed<v4f32, MVE_VSTRWU32_pre, aligned32_pre_store, 2>;
|
|
|
|
def : MVE_vector_offset_store_typed<v4f32, MVE_VSTRWU32_post, aligned32_post_store, 2>;
|
2019-08-08 23:15:19 +08:00
|
|
|
}
|
|
|
|
|
2019-09-15 22:14:47 +08:00
|
|
|
let Predicates = [HasMVEInt] in {
|
|
|
|
// Aligned masked store, shared between LE and BE
|
2019-11-27 00:18:58 +08:00
|
|
|
def : MVE_vector_maskedstore_typed<v16i8, MVE_VSTRBU8, aligned_maskedstvi8, 0>;
|
|
|
|
def : MVE_vector_maskedstore_typed<v8i16, MVE_VSTRHU16, aligned_maskedstvi16, 1>;
|
|
|
|
def : MVE_vector_maskedstore_typed<v8f16, MVE_VSTRHU16, aligned_maskedstvi16, 1>;
|
|
|
|
def : MVE_vector_maskedstore_typed<v4i32, MVE_VSTRWU32, aligned_maskedstvi32, 2>;
|
|
|
|
def : MVE_vector_maskedstore_typed<v4f32, MVE_VSTRWU32, aligned_maskedstvi32, 2>;
|
2019-11-21 22:56:37 +08:00
|
|
|
|
|
|
|
// Pre/Post inc masked stores
|
2019-11-27 00:18:58 +08:00
|
|
|
def : MVE_vector_offset_maskedstore_typed<v16i8, MVE_VSTRBU8_pre, aligned_pre_maskedstorevi8, 0>;
|
|
|
|
def : MVE_vector_offset_maskedstore_typed<v16i8, MVE_VSTRBU8_post, aligned_post_maskedstorevi8, 0>;
|
|
|
|
def : MVE_vector_offset_maskedstore_typed<v8i16, MVE_VSTRHU16_pre, aligned_pre_maskedstorevi16, 1>;
|
|
|
|
def : MVE_vector_offset_maskedstore_typed<v8i16, MVE_VSTRHU16_post, aligned_post_maskedstorevi16, 1>;
|
|
|
|
def : MVE_vector_offset_maskedstore_typed<v8f16, MVE_VSTRHU16_pre, aligned_pre_maskedstorevi16, 1>;
|
|
|
|
def : MVE_vector_offset_maskedstore_typed<v8f16, MVE_VSTRHU16_post, aligned_post_maskedstorevi16, 1>;
|
|
|
|
def : MVE_vector_offset_maskedstore_typed<v4i32, MVE_VSTRWU32_pre, aligned_pre_maskedstorevi32, 2>;
|
|
|
|
def : MVE_vector_offset_maskedstore_typed<v4i32, MVE_VSTRWU32_post, aligned_post_maskedstorevi32, 2>;
|
|
|
|
def : MVE_vector_offset_maskedstore_typed<v4f32, MVE_VSTRWU32_pre, aligned_pre_maskedstorevi32, 2>;
|
|
|
|
def : MVE_vector_offset_maskedstore_typed<v4f32, MVE_VSTRWU32_post, aligned_post_maskedstorevi32, 2>;
|
2019-11-21 22:56:37 +08:00
|
|
|
|
2019-09-15 22:14:47 +08:00
|
|
|
// Aligned masked loads
|
2019-11-27 00:18:58 +08:00
|
|
|
def : MVE_vector_maskedload_typed<v16i8, MVE_VLDRBU8, aligned_maskedloadvi8, 0>;
|
|
|
|
def : MVE_vector_maskedload_typed<v8i16, MVE_VLDRHU16, aligned_maskedloadvi16, 1>;
|
|
|
|
def : MVE_vector_maskedload_typed<v8f16, MVE_VLDRHU16, aligned_maskedloadvi16, 1>;
|
|
|
|
def : MVE_vector_maskedload_typed<v4i32, MVE_VLDRWU32, aligned_maskedloadvi32, 2>;
|
|
|
|
def : MVE_vector_maskedload_typed<v4f32, MVE_VLDRWU32, aligned_maskedloadvi32, 2>;
|
2019-09-15 22:14:47 +08:00
|
|
|
}
|
2019-06-28 17:47:55 +08:00
|
|
|
|
|
|
|
// Widening/Narrowing Loads/Stores
|
|
|
|
|
2019-11-27 00:18:58 +08:00
|
|
|
multiclass MVEExtLoadStore<Instruction LoadSInst, Instruction LoadUInst, string StoreInst,
|
|
|
|
string Amble, ValueType VT, int Shift> {
|
|
|
|
// Trunc stores
|
|
|
|
def : Pat<(!cast<PatFrag>("aligned_truncst"#Amble) (VT MQPR:$val), taddrmode_imm7<Shift>:$addr),
|
|
|
|
(!cast<Instruction>(StoreInst) MQPR:$val, taddrmode_imm7<Shift>:$addr)>;
|
|
|
|
def : Pat<(!cast<PatFrag>("aligned_post_truncst"#Amble) (VT MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<Shift>:$addr),
|
|
|
|
(!cast<Instruction>(StoreInst#"_post") MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<Shift>:$addr)>;
|
|
|
|
def : Pat<(!cast<PatFrag>("aligned_pre_truncst"#Amble) (VT MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<Shift>:$addr),
|
|
|
|
(!cast<Instruction>(StoreInst#"_pre") MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<Shift>:$addr)>;
|
|
|
|
|
|
|
|
// Masked trunc stores
|
|
|
|
def : Pat<(!cast<PatFrag>("aligned_truncmaskedst"#Amble) (VT MQPR:$val), taddrmode_imm7<Shift>:$addr, VCCR:$pred),
|
|
|
|
(!cast<Instruction>(StoreInst) MQPR:$val, taddrmode_imm7<Shift>:$addr, (i32 1), VCCR:$pred)>;
|
|
|
|
def : Pat<(!cast<PatFrag>("aligned_post_truncmaskedst"#Amble) (VT MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<Shift>:$addr, VCCR:$pred),
|
|
|
|
(!cast<Instruction>(StoreInst#"_post") MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<Shift>:$addr, (i32 1), VCCR:$pred)>;
|
|
|
|
def : Pat<(!cast<PatFrag>("aligned_pre_truncmaskedst"#Amble) (VT MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<Shift>:$addr, VCCR:$pred),
|
|
|
|
(!cast<Instruction>(StoreInst#"_pre") MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<Shift>:$addr, (i32 1), VCCR:$pred)>;
|
|
|
|
|
|
|
|
// Ext loads
|
|
|
|
def : Pat<(VT (!cast<PatFrag>("aligned_extload"#Amble) taddrmode_imm7<Shift>:$addr)),
|
|
|
|
(VT (LoadUInst taddrmode_imm7<Shift>:$addr))>;
|
|
|
|
def : Pat<(VT (!cast<PatFrag>("aligned_sextload"#Amble) taddrmode_imm7<Shift>:$addr)),
|
|
|
|
(VT (LoadSInst taddrmode_imm7<Shift>:$addr))>;
|
|
|
|
def : Pat<(VT (!cast<PatFrag>("aligned_zextload"#Amble) taddrmode_imm7<Shift>:$addr)),
|
|
|
|
(VT (LoadUInst taddrmode_imm7<Shift>:$addr))>;
|
|
|
|
|
|
|
|
// Masked ext loads
|
|
|
|
def : Pat<(VT (!cast<PatFrag>("aligned_extmaskedload"#Amble) taddrmode_imm7<Shift>:$addr, VCCR:$pred, (VT NEONimmAllZerosV))),
|
|
|
|
(VT (LoadUInst taddrmode_imm7<Shift>:$addr, (i32 1), VCCR:$pred))>;
|
|
|
|
def : Pat<(VT (!cast<PatFrag>("aligned_sextmaskedload"#Amble) taddrmode_imm7<Shift>:$addr, VCCR:$pred, (VT NEONimmAllZerosV))),
|
|
|
|
(VT (LoadSInst taddrmode_imm7<Shift>:$addr, (i32 1), VCCR:$pred))>;
|
|
|
|
def : Pat<(VT (!cast<PatFrag>("aligned_zextmaskedload"#Amble) taddrmode_imm7<Shift>:$addr, VCCR:$pred, (VT NEONimmAllZerosV))),
|
|
|
|
(VT (LoadUInst taddrmode_imm7<Shift>:$addr, (i32 1), VCCR:$pred))>;
|
2019-06-28 17:47:55 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
let Predicates = [HasMVEInt] in {
|
2019-11-27 00:18:58 +08:00
|
|
|
defm : MVEExtLoadStore<MVE_VLDRBS16, MVE_VLDRBU16, "MVE_VSTRB16", "vi8", v8i16, 0>;
|
|
|
|
defm : MVEExtLoadStore<MVE_VLDRBS32, MVE_VLDRBU32, "MVE_VSTRB32", "vi8", v4i32, 0>;
|
|
|
|
defm : MVEExtLoadStore<MVE_VLDRHS32, MVE_VLDRHU32, "MVE_VSTRH32", "vi16", v4i32, 1>;
|
2019-06-28 17:47:55 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
[ARM] Code-generation infrastructure for MVE.
This provides the low-level support to start using MVE vector types in
LLVM IR, loading and storing them, passing them to __asm__ statements
containing hand-written MVE vector instructions, and *if* you have the
hard-float ABI turned on, using them as function parameters.
(In the soft-float ABI, vector types are passed in integer registers,
and combining all those 32-bit integers into a q-reg requires support
for selection DAG nodes like insert_vector_elt and build_vector which
aren't implemented yet for MVE. In fact I've also had to add
`arm_aapcs_vfpcc` to a couple of existing tests to avoid that
problem.)
Specifically, this commit adds support for:
* spills, reloads and register moves for MVE vector registers
* ditto for the VPT predication mask that lives in VPR.P0
* make all the MVE vector types legal in ISel, and provide selection
DAG patterns for BITCAST, LOAD and STORE
* make loads and stores of scalar FP types conditional on
`hasFPRegs()` rather than `hasVFP2Base()`. As a result a few
existing tests needed their llc command lines updating to use
`-mattr=-fpregs` as their method of turning off all hardware FP
support.
Reviewers: dmgreen, samparker, SjoerdMeijer
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D60708
llvm-svn: 364329
2019-06-26 00:48:46 +08:00
|
|
|
// Bit convert patterns
|
|
|
|
|
|
|
|
let Predicates = [HasMVEInt] in {
|
2019-09-03 01:18:23 +08:00
|
|
|
def : Pat<(v2f64 (bitconvert (v2i64 MQPR:$src))), (v2f64 MQPR:$src)>;
|
|
|
|
def : Pat<(v2i64 (bitconvert (v2f64 MQPR:$src))), (v2i64 MQPR:$src)>;
|
[ARM] Code-generation infrastructure for MVE.
This provides the low-level support to start using MVE vector types in
LLVM IR, loading and storing them, passing them to __asm__ statements
containing hand-written MVE vector instructions, and *if* you have the
hard-float ABI turned on, using them as function parameters.
(In the soft-float ABI, vector types are passed in integer registers,
and combining all those 32-bit integers into a q-reg requires support
for selection DAG nodes like insert_vector_elt and build_vector which
aren't implemented yet for MVE. In fact I've also had to add
`arm_aapcs_vfpcc` to a couple of existing tests to avoid that
problem.)
Specifically, this commit adds support for:
* spills, reloads and register moves for MVE vector registers
* ditto for the VPT predication mask that lives in VPR.P0
* make all the MVE vector types legal in ISel, and provide selection
DAG patterns for BITCAST, LOAD and STORE
* make loads and stores of scalar FP types conditional on
`hasFPRegs()` rather than `hasVFP2Base()`. As a result a few
existing tests needed their llc command lines updating to use
`-mattr=-fpregs` as their method of turning off all hardware FP
support.
Reviewers: dmgreen, samparker, SjoerdMeijer
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D60708
llvm-svn: 364329
2019-06-26 00:48:46 +08:00
|
|
|
|
2019-09-03 01:18:23 +08:00
|
|
|
def : Pat<(v4i32 (bitconvert (v4f32 MQPR:$src))), (v4i32 MQPR:$src)>;
|
|
|
|
def : Pat<(v4f32 (bitconvert (v4i32 MQPR:$src))), (v4f32 MQPR:$src)>;
|
[ARM] Code-generation infrastructure for MVE.
This provides the low-level support to start using MVE vector types in
LLVM IR, loading and storing them, passing them to __asm__ statements
containing hand-written MVE vector instructions, and *if* you have the
hard-float ABI turned on, using them as function parameters.
(In the soft-float ABI, vector types are passed in integer registers,
and combining all those 32-bit integers into a q-reg requires support
for selection DAG nodes like insert_vector_elt and build_vector which
aren't implemented yet for MVE. In fact I've also had to add
`arm_aapcs_vfpcc` to a couple of existing tests to avoid that
problem.)
Specifically, this commit adds support for:
* spills, reloads and register moves for MVE vector registers
* ditto for the VPT predication mask that lives in VPR.P0
* make all the MVE vector types legal in ISel, and provide selection
DAG patterns for BITCAST, LOAD and STORE
* make loads and stores of scalar FP types conditional on
`hasFPRegs()` rather than `hasVFP2Base()`. As a result a few
existing tests needed their llc command lines updating to use
`-mattr=-fpregs` as their method of turning off all hardware FP
support.
Reviewers: dmgreen, samparker, SjoerdMeijer
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D60708
llvm-svn: 364329
2019-06-26 00:48:46 +08:00
|
|
|
|
2019-09-03 01:18:23 +08:00
|
|
|
def : Pat<(v8i16 (bitconvert (v8f16 MQPR:$src))), (v8i16 MQPR:$src)>;
|
|
|
|
def : Pat<(v8f16 (bitconvert (v8i16 MQPR:$src))), (v8f16 MQPR:$src)>;
|
[ARM] Code-generation infrastructure for MVE.
This provides the low-level support to start using MVE vector types in
LLVM IR, loading and storing them, passing them to __asm__ statements
containing hand-written MVE vector instructions, and *if* you have the
hard-float ABI turned on, using them as function parameters.
(In the soft-float ABI, vector types are passed in integer registers,
and combining all those 32-bit integers into a q-reg requires support
for selection DAG nodes like insert_vector_elt and build_vector which
aren't implemented yet for MVE. In fact I've also had to add
`arm_aapcs_vfpcc` to a couple of existing tests to avoid that
problem.)
Specifically, this commit adds support for:
* spills, reloads and register moves for MVE vector registers
* ditto for the VPT predication mask that lives in VPR.P0
* make all the MVE vector types legal in ISel, and provide selection
DAG patterns for BITCAST, LOAD and STORE
* make loads and stores of scalar FP types conditional on
`hasFPRegs()` rather than `hasVFP2Base()`. As a result a few
existing tests needed their llc command lines updating to use
`-mattr=-fpregs` as their method of turning off all hardware FP
support.
Reviewers: dmgreen, samparker, SjoerdMeijer
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D60708
llvm-svn: 364329
2019-06-26 00:48:46 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
let Predicates = [IsLE,HasMVEInt] in {
|
2019-09-03 01:18:23 +08:00
|
|
|
def : Pat<(v2f64 (bitconvert (v4f32 MQPR:$src))), (v2f64 MQPR:$src)>;
|
|
|
|
def : Pat<(v2f64 (bitconvert (v4i32 MQPR:$src))), (v2f64 MQPR:$src)>;
|
|
|
|
def : Pat<(v2f64 (bitconvert (v8f16 MQPR:$src))), (v2f64 MQPR:$src)>;
|
|
|
|
def : Pat<(v2f64 (bitconvert (v8i16 MQPR:$src))), (v2f64 MQPR:$src)>;
|
|
|
|
def : Pat<(v2f64 (bitconvert (v16i8 MQPR:$src))), (v2f64 MQPR:$src)>;
|
|
|
|
|
|
|
|
def : Pat<(v2i64 (bitconvert (v4f32 MQPR:$src))), (v2i64 MQPR:$src)>;
|
|
|
|
def : Pat<(v2i64 (bitconvert (v4i32 MQPR:$src))), (v2i64 MQPR:$src)>;
|
|
|
|
def : Pat<(v2i64 (bitconvert (v8f16 MQPR:$src))), (v2i64 MQPR:$src)>;
|
|
|
|
def : Pat<(v2i64 (bitconvert (v8i16 MQPR:$src))), (v2i64 MQPR:$src)>;
|
|
|
|
def : Pat<(v2i64 (bitconvert (v16i8 MQPR:$src))), (v2i64 MQPR:$src)>;
|
|
|
|
|
|
|
|
def : Pat<(v4f32 (bitconvert (v2f64 MQPR:$src))), (v4f32 MQPR:$src)>;
|
|
|
|
def : Pat<(v4f32 (bitconvert (v2i64 MQPR:$src))), (v4f32 MQPR:$src)>;
|
|
|
|
def : Pat<(v4f32 (bitconvert (v8f16 MQPR:$src))), (v4f32 MQPR:$src)>;
|
|
|
|
def : Pat<(v4f32 (bitconvert (v8i16 MQPR:$src))), (v4f32 MQPR:$src)>;
|
|
|
|
def : Pat<(v4f32 (bitconvert (v16i8 MQPR:$src))), (v4f32 MQPR:$src)>;
|
|
|
|
|
|
|
|
def : Pat<(v4i32 (bitconvert (v2f64 MQPR:$src))), (v4i32 MQPR:$src)>;
|
|
|
|
def : Pat<(v4i32 (bitconvert (v2i64 MQPR:$src))), (v4i32 MQPR:$src)>;
|
|
|
|
def : Pat<(v4i32 (bitconvert (v8f16 MQPR:$src))), (v4i32 MQPR:$src)>;
|
|
|
|
def : Pat<(v4i32 (bitconvert (v8i16 MQPR:$src))), (v4i32 MQPR:$src)>;
|
|
|
|
def : Pat<(v4i32 (bitconvert (v16i8 MQPR:$src))), (v4i32 MQPR:$src)>;
|
|
|
|
|
|
|
|
def : Pat<(v8f16 (bitconvert (v2f64 MQPR:$src))), (v8f16 MQPR:$src)>;
|
|
|
|
def : Pat<(v8f16 (bitconvert (v2i64 MQPR:$src))), (v8f16 MQPR:$src)>;
|
|
|
|
def : Pat<(v8f16 (bitconvert (v4f32 MQPR:$src))), (v8f16 MQPR:$src)>;
|
|
|
|
def : Pat<(v8f16 (bitconvert (v4i32 MQPR:$src))), (v8f16 MQPR:$src)>;
|
|
|
|
def : Pat<(v8f16 (bitconvert (v16i8 MQPR:$src))), (v8f16 MQPR:$src)>;
|
|
|
|
|
|
|
|
def : Pat<(v8i16 (bitconvert (v2f64 MQPR:$src))), (v8i16 MQPR:$src)>;
|
|
|
|
def : Pat<(v8i16 (bitconvert (v2i64 MQPR:$src))), (v8i16 MQPR:$src)>;
|
|
|
|
def : Pat<(v8i16 (bitconvert (v4f32 MQPR:$src))), (v8i16 MQPR:$src)>;
|
|
|
|
def : Pat<(v8i16 (bitconvert (v4i32 MQPR:$src))), (v8i16 MQPR:$src)>;
|
|
|
|
def : Pat<(v8i16 (bitconvert (v16i8 MQPR:$src))), (v8i16 MQPR:$src)>;
|
|
|
|
|
|
|
|
def : Pat<(v16i8 (bitconvert (v2f64 MQPR:$src))), (v16i8 MQPR:$src)>;
|
|
|
|
def : Pat<(v16i8 (bitconvert (v2i64 MQPR:$src))), (v16i8 MQPR:$src)>;
|
|
|
|
def : Pat<(v16i8 (bitconvert (v4f32 MQPR:$src))), (v16i8 MQPR:$src)>;
|
|
|
|
def : Pat<(v16i8 (bitconvert (v4i32 MQPR:$src))), (v16i8 MQPR:$src)>;
|
|
|
|
def : Pat<(v16i8 (bitconvert (v8f16 MQPR:$src))), (v16i8 MQPR:$src)>;
|
|
|
|
def : Pat<(v16i8 (bitconvert (v8i16 MQPR:$src))), (v16i8 MQPR:$src)>;
|
[ARM] Code-generation infrastructure for MVE.
This provides the low-level support to start using MVE vector types in
LLVM IR, loading and storing them, passing them to __asm__ statements
containing hand-written MVE vector instructions, and *if* you have the
hard-float ABI turned on, using them as function parameters.
(In the soft-float ABI, vector types are passed in integer registers,
and combining all those 32-bit integers into a q-reg requires support
for selection DAG nodes like insert_vector_elt and build_vector which
aren't implemented yet for MVE. In fact I've also had to add
`arm_aapcs_vfpcc` to a couple of existing tests to avoid that
problem.)
Specifically, this commit adds support for:
* spills, reloads and register moves for MVE vector registers
* ditto for the VPT predication mask that lives in VPR.P0
* make all the MVE vector types legal in ISel, and provide selection
DAG patterns for BITCAST, LOAD and STORE
* make loads and stores of scalar FP types conditional on
`hasFPRegs()` rather than `hasVFP2Base()`. As a result a few
existing tests needed their llc command lines updating to use
`-mattr=-fpregs` as their method of turning off all hardware FP
support.
Reviewers: dmgreen, samparker, SjoerdMeijer
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D60708
llvm-svn: 364329
2019-06-26 00:48:46 +08:00
|
|
|
}
|
2019-08-04 18:18:15 +08:00
|
|
|
|
|
|
|
let Predicates = [IsBE,HasMVEInt] in {
|
2019-09-03 01:18:23 +08:00
|
|
|
def : Pat<(v2f64 (bitconvert (v4f32 MQPR:$src))), (v2f64 (MVE_VREV64_32 MQPR:$src))>;
|
|
|
|
def : Pat<(v2f64 (bitconvert (v4i32 MQPR:$src))), (v2f64 (MVE_VREV64_32 MQPR:$src))>;
|
|
|
|
def : Pat<(v2f64 (bitconvert (v8f16 MQPR:$src))), (v2f64 (MVE_VREV64_16 MQPR:$src))>;
|
|
|
|
def : Pat<(v2f64 (bitconvert (v8i16 MQPR:$src))), (v2f64 (MVE_VREV64_16 MQPR:$src))>;
|
|
|
|
def : Pat<(v2f64 (bitconvert (v16i8 MQPR:$src))), (v2f64 (MVE_VREV64_8 MQPR:$src))>;
|
|
|
|
|
|
|
|
def : Pat<(v2i64 (bitconvert (v4f32 MQPR:$src))), (v2i64 (MVE_VREV64_32 MQPR:$src))>;
|
|
|
|
def : Pat<(v2i64 (bitconvert (v4i32 MQPR:$src))), (v2i64 (MVE_VREV64_32 MQPR:$src))>;
|
|
|
|
def : Pat<(v2i64 (bitconvert (v8f16 MQPR:$src))), (v2i64 (MVE_VREV64_16 MQPR:$src))>;
|
|
|
|
def : Pat<(v2i64 (bitconvert (v8i16 MQPR:$src))), (v2i64 (MVE_VREV64_16 MQPR:$src))>;
|
|
|
|
def : Pat<(v2i64 (bitconvert (v16i8 MQPR:$src))), (v2i64 (MVE_VREV64_8 MQPR:$src))>;
|
|
|
|
|
|
|
|
def : Pat<(v4f32 (bitconvert (v2f64 MQPR:$src))), (v4f32 (MVE_VREV64_32 MQPR:$src))>;
|
|
|
|
def : Pat<(v4f32 (bitconvert (v2i64 MQPR:$src))), (v4f32 (MVE_VREV64_32 MQPR:$src))>;
|
|
|
|
def : Pat<(v4f32 (bitconvert (v8f16 MQPR:$src))), (v4f32 (MVE_VREV32_16 MQPR:$src))>;
|
|
|
|
def : Pat<(v4f32 (bitconvert (v8i16 MQPR:$src))), (v4f32 (MVE_VREV32_16 MQPR:$src))>;
|
|
|
|
def : Pat<(v4f32 (bitconvert (v16i8 MQPR:$src))), (v4f32 (MVE_VREV32_8 MQPR:$src))>;
|
|
|
|
|
|
|
|
def : Pat<(v4i32 (bitconvert (v2f64 MQPR:$src))), (v4i32 (MVE_VREV64_32 MQPR:$src))>;
|
|
|
|
def : Pat<(v4i32 (bitconvert (v2i64 MQPR:$src))), (v4i32 (MVE_VREV64_32 MQPR:$src))>;
|
|
|
|
def : Pat<(v4i32 (bitconvert (v8f16 MQPR:$src))), (v4i32 (MVE_VREV32_16 MQPR:$src))>;
|
|
|
|
def : Pat<(v4i32 (bitconvert (v8i16 MQPR:$src))), (v4i32 (MVE_VREV32_16 MQPR:$src))>;
|
|
|
|
def : Pat<(v4i32 (bitconvert (v16i8 MQPR:$src))), (v4i32 (MVE_VREV32_8 MQPR:$src))>;
|
|
|
|
|
|
|
|
def : Pat<(v8f16 (bitconvert (v2f64 MQPR:$src))), (v8f16 (MVE_VREV64_16 MQPR:$src))>;
|
|
|
|
def : Pat<(v8f16 (bitconvert (v2i64 MQPR:$src))), (v8f16 (MVE_VREV64_16 MQPR:$src))>;
|
|
|
|
def : Pat<(v8f16 (bitconvert (v4f32 MQPR:$src))), (v8f16 (MVE_VREV32_16 MQPR:$src))>;
|
|
|
|
def : Pat<(v8f16 (bitconvert (v4i32 MQPR:$src))), (v8f16 (MVE_VREV32_16 MQPR:$src))>;
|
|
|
|
def : Pat<(v8f16 (bitconvert (v16i8 MQPR:$src))), (v8f16 (MVE_VREV16_8 MQPR:$src))>;
|
|
|
|
|
|
|
|
def : Pat<(v8i16 (bitconvert (v2f64 MQPR:$src))), (v8i16 (MVE_VREV64_16 MQPR:$src))>;
|
|
|
|
def : Pat<(v8i16 (bitconvert (v2i64 MQPR:$src))), (v8i16 (MVE_VREV64_16 MQPR:$src))>;
|
|
|
|
def : Pat<(v8i16 (bitconvert (v4f32 MQPR:$src))), (v8i16 (MVE_VREV32_16 MQPR:$src))>;
|
|
|
|
def : Pat<(v8i16 (bitconvert (v4i32 MQPR:$src))), (v8i16 (MVE_VREV32_16 MQPR:$src))>;
|
|
|
|
def : Pat<(v8i16 (bitconvert (v16i8 MQPR:$src))), (v8i16 (MVE_VREV16_8 MQPR:$src))>;
|
|
|
|
|
|
|
|
def : Pat<(v16i8 (bitconvert (v2f64 MQPR:$src))), (v16i8 (MVE_VREV64_8 MQPR:$src))>;
|
|
|
|
def : Pat<(v16i8 (bitconvert (v2i64 MQPR:$src))), (v16i8 (MVE_VREV64_8 MQPR:$src))>;
|
|
|
|
def : Pat<(v16i8 (bitconvert (v4f32 MQPR:$src))), (v16i8 (MVE_VREV32_8 MQPR:$src))>;
|
|
|
|
def : Pat<(v16i8 (bitconvert (v4i32 MQPR:$src))), (v16i8 (MVE_VREV32_8 MQPR:$src))>;
|
|
|
|
def : Pat<(v16i8 (bitconvert (v8f16 MQPR:$src))), (v16i8 (MVE_VREV16_8 MQPR:$src))>;
|
|
|
|
def : Pat<(v16i8 (bitconvert (v8i16 MQPR:$src))), (v16i8 (MVE_VREV16_8 MQPR:$src))>;
|
2019-08-04 18:18:15 +08:00
|
|
|
}
|