[AArch64][SVE] Asm: Support for UDOT/SDOT instructions.

The signed/unsigned DOT instructions perform a dot-product on
quadtuplets from two source vectors and accumulate the result in
the destination register. The instructions come in two forms:

Vector form, e.g.
  sdot  z0.s, z1.b, z2.b     - signed dot product on four 8-bit quad-tuplets,
                               accumulating results in 32-bit elements.

  udot  z0.d, z1.h, z2.h     - unsigned dot product on four 16-bit quad-tuplets,
                               accumulating results in 64-bit elements.

Indexed form, e.g.
  sdot  z0.s, z1.b, z2.b[3]  - signed dot product on four 8-bit quad-tuplets
                               with specified quadtuplet from second
                               source vector, accumulating results in 32-bit
                               elements.
  udot  z0.d, z1.h, z2.h[1]  - dot product on four 16-bit quad-tuplets
                               with specified quadtuplet from second
                               source vector, accumulating results in 64-bit
                               elements.

llvm-svn: 337372
This commit is contained in:
Sander de Smalen 2018-07-18 09:37:51 +00:00
parent c1090da852
commit ccdc7ebc1d
6 changed files with 252 additions and 0 deletions

View File

@ -72,6 +72,12 @@ let Predicates = [HasSVE] in {
defm SDIVR_ZPmZ : sve_int_bin_pred_arit_2_div<0b110, "sdivr">;
defm UDIVR_ZPmZ : sve_int_bin_pred_arit_2_div<0b111, "udivr">;
defm SDOT_ZZZ : sve_intx_dot<0b0, "sdot">;
defm UDOT_ZZZ : sve_intx_dot<0b1, "udot">;
defm SDOT_ZZZI : sve_intx_dot_by_indexed_elem<0b0, "sdot">;
defm UDOT_ZZZI : sve_intx_dot_by_indexed_elem<0b1, "udot">;
defm SXTB_ZPmZ : sve_int_un_pred_arit_0_h<0b000, "sxtb">;
defm UXTB_ZPmZ : sve_int_un_pred_arit_0_h<0b001, "uxtb">;
defm SXTH_ZPmZ : sve_int_un_pred_arit_0_w<0b010, "sxth">;

View File

@ -1457,6 +1457,72 @@ multiclass sve_int_mlas_vvv_pred<bits<1> opc, string asm> {
def _D : sve_int_mlas_vvv_pred<0b11, opc, asm, ZPR64>;
}
//===----------------------------------------------------------------------===//
// SVE Integer Dot Product Group
//===----------------------------------------------------------------------===//
class sve_intx_dot<bit sz, bit U, string asm, ZPRRegOp zprty1,
ZPRRegOp zprty2>
: I<(outs zprty1:$Zda), (ins zprty1:$_Zda, zprty2:$Zn, zprty2:$Zm), asm,
"\t$Zda, $Zn, $Zm", "", []>, Sched<[]> {
bits<5> Zda;
bits<5> Zn;
bits<5> Zm;
let Inst{31-23} = 0b010001001;
let Inst{22} = sz;
let Inst{21} = 0;
let Inst{20-16} = Zm;
let Inst{15-11} = 0;
let Inst{10} = U;
let Inst{9-5} = Zn;
let Inst{4-0} = Zda;
let Constraints = "$Zda = $_Zda";
}
multiclass sve_intx_dot<bit opc, string asm> {
def _S : sve_intx_dot<0b0, opc, asm, ZPR32, ZPR8>;
def _D : sve_intx_dot<0b1, opc, asm, ZPR64, ZPR16>;
}
//===----------------------------------------------------------------------===//
// SVE Integer Dot Product Group - Indexed Group
//===----------------------------------------------------------------------===//
class sve_intx_dot_by_indexed_elem<bit sz, bit U, string asm,
ZPRRegOp zprty1, ZPRRegOp zprty2,
ZPRRegOp zprty3, Operand itype>
: I<(outs zprty1:$Zda), (ins zprty1:$_Zda, zprty2:$Zn, zprty3:$Zm, itype:$iop),
asm, "\t$Zda, $Zn, $Zm$iop",
"", []>, Sched<[]> {
bits<5> Zda;
bits<5> Zn;
let Inst{31-23} = 0b010001001;
let Inst{22} = sz;
let Inst{21} = 0b1;
let Inst{15-11} = 0;
let Inst{10} = U;
let Inst{9-5} = Zn;
let Inst{4-0} = Zda;
let Constraints = "$Zda = $_Zda";
}
multiclass sve_intx_dot_by_indexed_elem<bit opc, string asm> {
def _S : sve_intx_dot_by_indexed_elem<0b0, opc, asm, ZPR32, ZPR8, ZPR3b8, VectorIndexS> {
bits<2> iop;
bits<3> Zm;
let Inst{20-19} = iop;
let Inst{18-16} = Zm;
}
def _D : sve_intx_dot_by_indexed_elem<0b1, opc, asm, ZPR64, ZPR16, ZPR4b16, VectorIndexD> {
bits<1> iop;
bits<4> Zm;
let Inst{20} = iop;
let Inst{19-16} = Zm;
}
}
//===----------------------------------------------------------------------===//
// SVE Integer Arithmetic - Unary Predicated Group
//===----------------------------------------------------------------------===//

View File

@ -0,0 +1,58 @@
// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve 2>&1 < %s| FileCheck %s
// ------------------------------------------------------------------------- //
// Invalid element size
sdot z0.s, z1.h, z31.h
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
// CHECK-NEXT: sdot z0.s, z1.h, z31.h
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
sdot z0.d, z1.b, z31.b
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
// CHECK-NEXT: sdot z0.d, z1.b, z31.b
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
sdot z0.d, z1.s, z31.s
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
// CHECK-NEXT: sdot z0.d, z1.s, z31.s
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
// ------------------------------------------------------------------------- //
// Invalid restricted register for indexed vector.
sdot z0.s, z1.b, z8.b[3]
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
// CHECK-NEXT: sdot z0.s, z1.b, z8.b[3]
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
sdot z0.d, z1.h, z16.h[1]
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
// CHECK-NEXT: sdot z0.d, z1.h, z16.h[1]
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
// ------------------------------------------------------------------------- //
// Invalid element index
sdot z0.s, z1.b, z7.b[-1]
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 3].
// CHECK-NEXT: sdot z0.s, z1.b, z7.b[-1]
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
sdot z0.s, z1.b, z7.b[4]
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 3].
// CHECK-NEXT: sdot z0.s, z1.b, z7.b[4]
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
sdot z0.d, z1.h, z15.h[-1]
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 1].
// CHECK-NEXT: sdot z0.d, z1.h, z15.h[-1]
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
sdot z0.d, z1.h, z15.h[2]
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 1].
// CHECK-NEXT: sdot z0.d, z1.h, z15.h[2]
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:

View File

@ -0,0 +1,32 @@
// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \
// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
// RUN: | FileCheck %s --check-prefix=CHECK-ERROR
// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve < %s \
// RUN: | llvm-objdump -d -mattr=+sve - | FileCheck %s --check-prefix=CHECK-INST
// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve < %s \
// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
sdot z0.s, z1.b, z31.b
// CHECK-INST: sdot z0.s, z1.b, z31.b
// CHECK-ENCODING: [0x20,0x00,0x9f,0x44]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: 20 00 9f 44 <unknown>
sdot z0.d, z1.h, z31.h
// CHECK-INST: sdot z0.d, z1.h, z31.h
// CHECK-ENCODING: [0x20,0x00,0xdf,0x44]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: 20 00 df 44 <unknown>
sdot z0.s, z1.b, z7.b[3]
// CHECK-INST: sdot z0.s, z1.b, z7.b[3]
// CHECK-ENCODING: [0x20,0x00,0xbf,0x44]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: 20 00 bf 44 <unknown>
sdot z0.d, z1.h, z15.h[1]
// CHECK-INST: sdot z0.d, z1.h, z15.h[1]
// CHECK-ENCODING: [0x20,0x00,0xff,0x44]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: 20 00 ff 44 <unknown>

View File

@ -0,0 +1,58 @@
// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve 2>&1 < %s| FileCheck %s
// ------------------------------------------------------------------------- //
// Invalid element size
udot z0.s, z1.h, z31.h
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
// CHECK-NEXT: udot z0.s, z1.h, z31.h
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
udot z0.d, z1.b, z31.b
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
// CHECK-NEXT: udot z0.d, z1.b, z31.b
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
udot z0.d, z1.s, z31.s
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
// CHECK-NEXT: udot z0.d, z1.s, z31.s
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
// ------------------------------------------------------------------------- //
// Invalid restricted register for indexed vector.
udot z0.s, z1.b, z8.b[3]
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
// CHECK-NEXT: udot z0.s, z1.b, z8.b[3]
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
udot z0.d, z1.h, z16.h[1]
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
// CHECK-NEXT: udot z0.d, z1.h, z16.h[1]
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
// ------------------------------------------------------------------------- //
// Invalid element index
udot z0.s, z1.b, z7.b[-1]
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 3].
// CHECK-NEXT: udot z0.s, z1.b, z7.b[-1]
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
udot z0.s, z1.b, z7.b[4]
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 3].
// CHECK-NEXT: udot z0.s, z1.b, z7.b[4]
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
udot z0.d, z1.h, z15.h[-1]
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 1].
// CHECK-NEXT: udot z0.d, z1.h, z15.h[-1]
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
udot z0.d, z1.h, z15.h[2]
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 1].
// CHECK-NEXT: udot z0.d, z1.h, z15.h[2]
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:

View File

@ -0,0 +1,32 @@
// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \
// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
// RUN: | FileCheck %s --check-prefix=CHECK-ERROR
// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve < %s \
// RUN: | llvm-objdump -d -mattr=+sve - | FileCheck %s --check-prefix=CHECK-INST
// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve < %s \
// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
udot z0.s, z1.b, z31.b
// CHECK-INST: udot z0.s, z1.b, z31.b
// CHECK-ENCODING: [0x20,0x04,0x9f,0x44]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: 20 04 9f 44 <unknown>
udot z0.d, z1.h, z31.h
// CHECK-INST: udot z0.d, z1.h, z31.h
// CHECK-ENCODING: [0x20,0x04,0xdf,0x44]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: 20 04 df 44 <unknown>
udot z0.s, z1.b, z7.b[3]
// CHECK-INST: udot z0.s, z1.b, z7.b[3]
// CHECK-ENCODING: [0x20,0x04,0xbf,0x44]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: 20 04 bf 44 <unknown>
udot z0.d, z1.h, z15.h[1]
// CHECK-INST: udot z0.d, z1.h, z15.h[1]
// CHECK-ENCODING: [0x20,0x04,0xff,0x44]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: 20 04 ff 44 <unknown>