forked from OSchip/llvm-project
[AArch64][SVE] Implement sdot and udot (lane) intrinsics
Summary: Implements the following arithmetic intrinsics: - int_aarch64_sve_sdot - int_aarch64_sve_sdot_lane - int_aarch64_sve_udot - int_aarch64_sve_udot_lane This patch includes tests for the Subdivide4Argument type added by D67549 Reviewers: sdesmalen, SjoerdMeijer, greened, rengolin, rovka Reviewed By: sdesmalen Subscribers: tschuett, kristof.beyls, rkruppe, psnobl, cfe-commits, llvm-commits Differential Revision: https://reviews.llvm.org/D67551 llvm-svn: 374566
This commit is contained in:
parent
39d4c9fd56
commit
ee0a0a3464
|
@ -780,6 +780,21 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
|
|||
[llvm_anyvector_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
class AdvSIMD_SVE_DOT_Intrinsic
|
||||
: Intrinsic<[llvm_anyvector_ty],
|
||||
[LLVMMatchType<0>,
|
||||
LLVMSubdivide4VectorType<0>,
|
||||
LLVMSubdivide4VectorType<0>],
|
||||
[IntrNoMem]>;
|
||||
|
||||
class AdvSIMD_SVE_DOT_Indexed_Intrinsic
|
||||
: Intrinsic<[llvm_anyvector_ty],
|
||||
[LLVMMatchType<0>,
|
||||
LLVMSubdivide4VectorType<0>,
|
||||
LLVMSubdivide4VectorType<0>,
|
||||
llvm_i32_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
// This class of intrinsics are not intended to be useful within LLVM IR but
|
||||
// are instead here to support some of the more regid parts of the ACLE.
|
||||
class Builtin_SVCVT<string name, LLVMType OUT, LLVMType IN>
|
||||
|
@ -799,6 +814,12 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
|
|||
def int_aarch64_sve_abs : AdvSIMD_Merged1VectorArg_Intrinsic;
|
||||
def int_aarch64_sve_neg : AdvSIMD_Merged1VectorArg_Intrinsic;
|
||||
|
||||
def int_aarch64_sve_sdot : AdvSIMD_SVE_DOT_Intrinsic;
|
||||
def int_aarch64_sve_sdot_lane : AdvSIMD_SVE_DOT_Indexed_Intrinsic;
|
||||
|
||||
def int_aarch64_sve_udot : AdvSIMD_SVE_DOT_Intrinsic;
|
||||
def int_aarch64_sve_udot_lane : AdvSIMD_SVE_DOT_Indexed_Intrinsic;
|
||||
|
||||
//
|
||||
// Counting bits
|
||||
//
|
||||
|
|
|
@ -1011,8 +1011,8 @@ class AsmVectorIndex<int Min, int Max, string NamePrefix=""> : AsmOperandClass {
|
|||
let RenderMethod = "addVectorIndexOperands";
|
||||
}
|
||||
|
||||
class AsmVectorIndexOpnd<AsmOperandClass mc, code pred>
|
||||
: Operand<i64>, ImmLeaf<i64, pred> {
|
||||
class AsmVectorIndexOpnd<ValueType ty, AsmOperandClass mc, code pred>
|
||||
: Operand<ty>, ImmLeaf<ty, pred> {
|
||||
let ParserMatchClass = mc;
|
||||
let PrintMethod = "printVectorIndex";
|
||||
}
|
||||
|
@ -1023,11 +1023,17 @@ def VectorIndexHOperand : AsmVectorIndex<0, 7>;
|
|||
def VectorIndexSOperand : AsmVectorIndex<0, 3>;
|
||||
def VectorIndexDOperand : AsmVectorIndex<0, 1>;
|
||||
|
||||
def VectorIndex1 : AsmVectorIndexOpnd<VectorIndex1Operand, [{ return ((uint64_t)Imm) == 1; }]>;
|
||||
def VectorIndexB : AsmVectorIndexOpnd<VectorIndexBOperand, [{ return ((uint64_t)Imm) < 16; }]>;
|
||||
def VectorIndexH : AsmVectorIndexOpnd<VectorIndexHOperand, [{ return ((uint64_t)Imm) < 8; }]>;
|
||||
def VectorIndexS : AsmVectorIndexOpnd<VectorIndexSOperand, [{ return ((uint64_t)Imm) < 4; }]>;
|
||||
def VectorIndexD : AsmVectorIndexOpnd<VectorIndexDOperand, [{ return ((uint64_t)Imm) < 2; }]>;
|
||||
def VectorIndex1 : AsmVectorIndexOpnd<i64, VectorIndex1Operand, [{ return ((uint64_t)Imm) == 1; }]>;
|
||||
def VectorIndexB : AsmVectorIndexOpnd<i64, VectorIndexBOperand, [{ return ((uint64_t)Imm) < 16; }]>;
|
||||
def VectorIndexH : AsmVectorIndexOpnd<i64, VectorIndexHOperand, [{ return ((uint64_t)Imm) < 8; }]>;
|
||||
def VectorIndexS : AsmVectorIndexOpnd<i64, VectorIndexSOperand, [{ return ((uint64_t)Imm) < 4; }]>;
|
||||
def VectorIndexD : AsmVectorIndexOpnd<i64, VectorIndexDOperand, [{ return ((uint64_t)Imm) < 2; }]>;
|
||||
|
||||
def VectorIndex132b : AsmVectorIndexOpnd<i32, VectorIndex1Operand, [{ return ((uint64_t)Imm) == 1; }]>;
|
||||
def VectorIndexB32b : AsmVectorIndexOpnd<i32, VectorIndexBOperand, [{ return ((uint64_t)Imm) < 16; }]>;
|
||||
def VectorIndexH32b : AsmVectorIndexOpnd<i32, VectorIndexHOperand, [{ return ((uint64_t)Imm) < 8; }]>;
|
||||
def VectorIndexS32b : AsmVectorIndexOpnd<i32, VectorIndexSOperand, [{ return ((uint64_t)Imm) < 4; }]>;
|
||||
def VectorIndexD32b : AsmVectorIndexOpnd<i32, VectorIndexDOperand, [{ return ((uint64_t)Imm) < 2; }]>;
|
||||
|
||||
def SVEVectorIndexExtDupBOperand : AsmVectorIndex<0, 63, "SVE">;
|
||||
def SVEVectorIndexExtDupHOperand : AsmVectorIndex<0, 31, "SVE">;
|
||||
|
@ -1036,15 +1042,15 @@ def SVEVectorIndexExtDupDOperand : AsmVectorIndex<0, 7, "SVE">;
|
|||
def SVEVectorIndexExtDupQOperand : AsmVectorIndex<0, 3, "SVE">;
|
||||
|
||||
def sve_elm_idx_extdup_b
|
||||
: AsmVectorIndexOpnd<SVEVectorIndexExtDupBOperand, [{ return ((uint64_t)Imm) < 64; }]>;
|
||||
: AsmVectorIndexOpnd<i64, SVEVectorIndexExtDupBOperand, [{ return ((uint64_t)Imm) < 64; }]>;
|
||||
def sve_elm_idx_extdup_h
|
||||
: AsmVectorIndexOpnd<SVEVectorIndexExtDupHOperand, [{ return ((uint64_t)Imm) < 32; }]>;
|
||||
: AsmVectorIndexOpnd<i64, SVEVectorIndexExtDupHOperand, [{ return ((uint64_t)Imm) < 32; }]>;
|
||||
def sve_elm_idx_extdup_s
|
||||
: AsmVectorIndexOpnd<SVEVectorIndexExtDupSOperand, [{ return ((uint64_t)Imm) < 16; }]>;
|
||||
: AsmVectorIndexOpnd<i64, SVEVectorIndexExtDupSOperand, [{ return ((uint64_t)Imm) < 16; }]>;
|
||||
def sve_elm_idx_extdup_d
|
||||
: AsmVectorIndexOpnd<SVEVectorIndexExtDupDOperand, [{ return ((uint64_t)Imm) < 8; }]>;
|
||||
: AsmVectorIndexOpnd<i64, SVEVectorIndexExtDupDOperand, [{ return ((uint64_t)Imm) < 8; }]>;
|
||||
def sve_elm_idx_extdup_q
|
||||
: AsmVectorIndexOpnd<SVEVectorIndexExtDupQOperand, [{ return ((uint64_t)Imm) < 4; }]>;
|
||||
: AsmVectorIndexOpnd<i64, SVEVectorIndexExtDupQOperand, [{ return ((uint64_t)Imm) < 4; }]>;
|
||||
|
||||
// 8-bit immediate for AdvSIMD where 64-bit values of the form:
|
||||
// aaaaaaaa bbbbbbbb cccccccc dddddddd eeeeeeee ffffffff gggggggg hhhhhhhh
|
||||
|
|
|
@ -82,11 +82,11 @@ let Predicates = [HasSVE] in {
|
|||
defm SDIVR_ZPmZ : sve_int_bin_pred_arit_2_div<0b110, "sdivr">;
|
||||
defm UDIVR_ZPmZ : sve_int_bin_pred_arit_2_div<0b111, "udivr">;
|
||||
|
||||
defm SDOT_ZZZ : sve_intx_dot<0b0, "sdot">;
|
||||
defm UDOT_ZZZ : sve_intx_dot<0b1, "udot">;
|
||||
defm SDOT_ZZZ : sve_intx_dot<0b0, "sdot", int_aarch64_sve_sdot>;
|
||||
defm UDOT_ZZZ : sve_intx_dot<0b1, "udot", int_aarch64_sve_udot>;
|
||||
|
||||
defm SDOT_ZZZI : sve_intx_dot_by_indexed_elem<0b0, "sdot">;
|
||||
defm UDOT_ZZZI : sve_intx_dot_by_indexed_elem<0b1, "udot">;
|
||||
defm SDOT_ZZZI : sve_intx_dot_by_indexed_elem<0b0, "sdot", int_aarch64_sve_sdot_lane>;
|
||||
defm UDOT_ZZZI : sve_intx_dot_by_indexed_elem<0b1, "udot", int_aarch64_sve_udot_lane>;
|
||||
|
||||
defm SXTB_ZPmZ : sve_int_un_pred_arit_0_h<0b000, "sxtb">;
|
||||
defm UXTB_ZPmZ : sve_int_un_pred_arit_0_h<0b001, "uxtb">;
|
||||
|
|
|
@ -2024,12 +2024,14 @@ class sve_intx_dot<bit sz, bit U, string asm, ZPRRegOp zprty1,
|
|||
|
||||
let Constraints = "$Zda = $_Zda";
|
||||
let DestructiveInstType = Destructive;
|
||||
let ElementSize = zprty1.ElementSize;
|
||||
}
|
||||
|
||||
multiclass sve_intx_dot<bit opc, string asm> {
|
||||
multiclass sve_intx_dot<bit opc, string asm, SDPatternOperator op> {
|
||||
def _S : sve_intx_dot<0b0, opc, asm, ZPR32, ZPR8>;
|
||||
def _D : sve_intx_dot<0b1, opc, asm, ZPR64, ZPR16>;
|
||||
|
||||
def : SVE_3_Op_Pat<nxv4i32, op, nxv4i32, nxv16i8, nxv16i8, !cast<Instruction>(NAME # _S)>;
|
||||
def : SVE_3_Op_Pat<nxv2i64, op, nxv2i64, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _D)>;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -2054,22 +2056,27 @@ class sve_intx_dot_by_indexed_elem<bit sz, bit U, string asm,
|
|||
|
||||
let Constraints = "$Zda = $_Zda";
|
||||
let DestructiveInstType = Destructive;
|
||||
let ElementSize = ElementSizeNone;
|
||||
}
|
||||
|
||||
multiclass sve_intx_dot_by_indexed_elem<bit opc, string asm> {
|
||||
def _S : sve_intx_dot_by_indexed_elem<0b0, opc, asm, ZPR32, ZPR8, ZPR3b8, VectorIndexS> {
|
||||
multiclass sve_intx_dot_by_indexed_elem<bit opc, string asm,
|
||||
SDPatternOperator op> {
|
||||
def _S : sve_intx_dot_by_indexed_elem<0b0, opc, asm, ZPR32, ZPR8, ZPR3b8, VectorIndexS32b> {
|
||||
bits<2> iop;
|
||||
bits<3> Zm;
|
||||
let Inst{20-19} = iop;
|
||||
let Inst{18-16} = Zm;
|
||||
}
|
||||
def _D : sve_intx_dot_by_indexed_elem<0b1, opc, asm, ZPR64, ZPR16, ZPR4b16, VectorIndexD> {
|
||||
def _D : sve_intx_dot_by_indexed_elem<0b1, opc, asm, ZPR64, ZPR16, ZPR4b16, VectorIndexD32b> {
|
||||
bits<1> iop;
|
||||
bits<4> Zm;
|
||||
let Inst{20} = iop;
|
||||
let Inst{19-16} = Zm;
|
||||
}
|
||||
|
||||
def : Pat<(nxv4i32 (op nxv4i32:$Op1, nxv16i8:$Op2, nxv16i8:$Op3, (i32 VectorIndexS32b:$idx))),
|
||||
(!cast<Instruction>(NAME # _S) $Op1, $Op2, $Op3, VectorIndexS32b:$idx)>;
|
||||
def : Pat<(nxv2i64 (op nxv2i64:$Op1, nxv8i16:$Op2, nxv8i16:$Op3, (i32 VectorIndexD32b:$idx))),
|
||||
(!cast<Instruction>(NAME # _D) $Op1, $Op2, $Op3, VectorIndexD32b:$idx)>;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
|
|
@ -88,6 +88,87 @@ define <vscale x 2 x i64> @neg_i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg,
|
|||
ret <vscale x 2 x i64> %out
|
||||
}
|
||||
|
||||
; SDOT
|
||||
|
||||
define <vscale x 4 x i32> @sdot_i32(<vscale x 4 x i32> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) {
|
||||
; CHECK-LABEL: sdot_i32:
|
||||
; CHECK: sdot z0.s, z1.b, z2.b
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 4 x i32> @llvm.aarch64.sve.sdot.nxv4i32(<vscale x 4 x i32> %a,
|
||||
<vscale x 16 x i8> %b,
|
||||
<vscale x 16 x i8> %c)
|
||||
ret <vscale x 4 x i32> %out
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @sdot_i64(<vscale x 2 x i64> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) {
|
||||
; CHECK-LABEL: sdot_i64:
|
||||
; CHECK: sdot z0.d, z1.h, z2.h
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 2 x i64> @llvm.aarch64.sve.sdot.nxv2i64(<vscale x 2 x i64> %a,
|
||||
<vscale x 8 x i16> %b,
|
||||
<vscale x 8 x i16> %c)
|
||||
ret <vscale x 2 x i64> %out
|
||||
}
|
||||
|
||||
; SDOT (Indexed)
|
||||
|
||||
define <vscale x 4 x i32> @sdot_lane_i32(<vscale x 4 x i32> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) {
|
||||
; CHECK-LABEL: sdot_lane_i32:
|
||||
; CHECK: sdot z0.s, z1.b, z2.b[2]
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 4 x i32> @llvm.aarch64.sve.sdot.lane.nxv4i32(<vscale x 4 x i32> %a,
|
||||
<vscale x 16 x i8> %b,
|
||||
<vscale x 16 x i8> %c,
|
||||
i32 2)
|
||||
ret <vscale x 4 x i32> %out
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @sdot_lane_i64(<vscale x 2 x i64> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) {
|
||||
; CHECK-LABEL: sdot_lane_i64:
|
||||
; CHECK: sdot z0.d, z1.h, z2.h[1]
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 2 x i64> @llvm.aarch64.sve.sdot.lane.nxv2i64(<vscale x 2 x i64> %a,
|
||||
<vscale x 8 x i16> %b,
|
||||
<vscale x 8 x i16> %c,
|
||||
i32 1)
|
||||
ret <vscale x 2 x i64> %out
|
||||
}
|
||||
|
||||
; UDOT
|
||||
|
||||
define <vscale x 4 x i32> @udot_i32(<vscale x 4 x i32> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) {
|
||||
; CHECK-LABEL: udot_i32:
|
||||
; CHECK: udot z0.s, z1.b, z2.b
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 4 x i32> @llvm.aarch64.sve.udot.nxv4i32(<vscale x 4 x i32> %a,
|
||||
<vscale x 16 x i8> %b,
|
||||
<vscale x 16 x i8> %c)
|
||||
ret <vscale x 4 x i32> %out
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @udot_i64(<vscale x 2 x i64> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) {
|
||||
; CHECK-LABEL: udot_i64:
|
||||
; CHECK: udot z0.d, z1.h, z2.h
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 2 x i64> @llvm.aarch64.sve.udot.nxv2i64(<vscale x 2 x i64> %a,
|
||||
<vscale x 8 x i16> %b,
|
||||
<vscale x 8 x i16> %c)
|
||||
ret <vscale x 2 x i64> %out
|
||||
}
|
||||
|
||||
; UDOT (Indexed)
|
||||
|
||||
define <vscale x 4 x i32> @udot_lane_i32(<vscale x 4 x i32> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) {
|
||||
; CHECK-LABEL: udot_lane_i32:
|
||||
; CHECK: udot z0.s, z1.b, z2.b[2]
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 4 x i32> @llvm.aarch64.sve.udot.lane.nxv4i32(<vscale x 4 x i32> %a,
|
||||
<vscale x 16 x i8> %b,
|
||||
<vscale x 16 x i8> %c,
|
||||
i32 2)
|
||||
ret <vscale x 4 x i32> %out
|
||||
}
|
||||
|
||||
declare <vscale x 16 x i8> @llvm.aarch64.sve.abs.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i1>, <vscale x 16 x i8>)
|
||||
declare <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, <vscale x 8 x i16>)
|
||||
declare <vscale x 4 x i32> @llvm.aarch64.sve.abs.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>)
|
||||
|
@ -97,3 +178,15 @@ declare <vscale x 16 x i8> @llvm.aarch64.sve.neg.nxv16i8(<vscale x 16 x i8>, <vs
|
|||
declare <vscale x 8 x i16> @llvm.aarch64.sve.neg.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, <vscale x 8 x i16>)
|
||||
declare <vscale x 4 x i32> @llvm.aarch64.sve.neg.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>)
|
||||
declare <vscale x 2 x i64> @llvm.aarch64.sve.neg.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, <vscale x 2 x i64>)
|
||||
|
||||
declare <vscale x 4 x i32> @llvm.aarch64.sve.sdot.nxv4i32(<vscale x 4 x i32>, <vscale x 16 x i8>, <vscale x 16 x i8>)
|
||||
declare <vscale x 2 x i64> @llvm.aarch64.sve.sdot.nxv2i64(<vscale x 2 x i64>, <vscale x 8 x i16>, <vscale x 8 x i16>)
|
||||
|
||||
declare <vscale x 4 x i32> @llvm.aarch64.sve.sdot.lane.nxv4i32(<vscale x 4 x i32>, <vscale x 16 x i8>, <vscale x 16 x i8>, i32)
|
||||
declare <vscale x 2 x i64> @llvm.aarch64.sve.sdot.lane.nxv2i64(<vscale x 2 x i64>, <vscale x 8 x i16>, <vscale x 8 x i16>, i32)
|
||||
|
||||
declare <vscale x 4 x i32> @llvm.aarch64.sve.udot.nxv4i32(<vscale x 4 x i32>, <vscale x 16 x i8>, <vscale x 16 x i8>)
|
||||
declare <vscale x 2 x i64> @llvm.aarch64.sve.udot.nxv2i64(<vscale x 2 x i64>, <vscale x 8 x i16>, <vscale x 8 x i16>)
|
||||
|
||||
declare <vscale x 4 x i32> @llvm.aarch64.sve.udot.lane.nxv4i32(<vscale x 4 x i32>, <vscale x 16 x i8>, <vscale x 16 x i8>, i32)
|
||||
declare <vscale x 2 x i64> @llvm.aarch64.sve.udot.lane.nxv2i64(<vscale x 2 x i64>, <vscale x 8 x i16>, <vscale x 8 x i16>, i32)
|
||||
|
|
Loading…
Reference in New Issue