[AArch64][SVE2] Asm: implement CDOT instruction

Summary:
The complex DOT instructions perform a dot-product on quadtuplets from
two source vectors and the resuling wide real or wide imaginary is
accumulated into the destination register. The instructions come in two
forms:

Vector form, e.g.
  cdot z0.s, z1.b, z2.b, #90    - complex dot product on four 8-bit quad-tuplets,
                                  accumulating results in 32-bit elements. The
                                  complex numbers in the second source vector are
                                  rotated by 90 degrees.

  cdot z0.d, z1.h, z2.h, #180   - complex dot product on four 16-bit quad-tuplets,
                                  accumulating results in 64-bit elements.
                                  The complex numbers in the second source
                                  vector are rotated by 180 degrees.

Indexed form, e.g.
  cdot z0.s, z1.b, z2.b[3], #0  - complex dot product on four 8-bit quad-tuplets,
                                  with specified quadtuplet from second source vector,
                                  accumulating results in 32-bit elements.
  cdot z0.d, z1.h, z2.h[1], #0  - complex dot product on four 16-bit quad-tuplets,
                                  with specified quadtuplet from second source vector,
                                  accumulating results in 64-bit elements.

The specification can be found here:
https://developer.arm.com/docs/ddi0602/latest

Reviewed By: SjoerdMeijer, rovka

Differential Revision: https://reviews.llvm.org/D61903

llvm-svn: 360870
This commit is contained in:
Cullen Rhodes 2019-05-16 09:33:44 +00:00
parent 3cbf3c8412
commit 07eba98dd7
4 changed files with 278 additions and 0 deletions

View File

@ -1051,4 +1051,10 @@ let Predicates = [HasSVE2] in {
defm SMULH_ZZZ : sve2_int_mul<0b010, "smulh">;
defm UMULH_ZZZ : sve2_int_mul<0b011, "umulh">;
def PMUL_ZZZ_B : sve2_int_mul<0b00, 0b001, "pmul", ZPR8>;
// SVE2 complex integer dot product (indexed)
defm CDOT_ZZZI : sve2_cintx_dot_by_indexed_elem<"cdot">;
// SVE2 complex integer dot product
defm CDOT_ZZZ : sve2_cintx_dot<"cdot">;
}

View File

@ -1837,6 +1837,79 @@ multiclass sve_intx_dot_by_indexed_elem<bit opc, string asm> {
}
}
//===----------------------------------------------------------------------===//
// SVE2 Complex Integer Dot Product Group
//===----------------------------------------------------------------------===//
class sve2_complex_int_arith<bits<2> sz, bits<4> opc, string asm,
ZPRRegOp zprty1, ZPRRegOp zprty2>
: I<(outs zprty1:$Zda), (ins zprty1:$_Zda, zprty2:$Zn, zprty2:$Zm,
complexrotateop:$rot),
asm, "\t$Zda, $Zn, $Zm, $rot", "", []>, Sched<[]> {
bits<5> Zda;
bits<5> Zn;
bits<5> Zm;
bits<2> rot;
let Inst{31-24} = 0b01000100;
let Inst{23-22} = sz;
let Inst{21} = 0b0;
let Inst{20-16} = Zm;
let Inst{15-12} = opc;
let Inst{11-10} = rot;
let Inst{9-5} = Zn;
let Inst{4-0} = Zda;
let Constraints = "$Zda = $_Zda";
let DestructiveInstType = Destructive;
let ElementSize = ElementSizeNone;
}
multiclass sve2_cintx_dot<string asm> {
def _S : sve2_complex_int_arith<0b10, 0b0001, asm, ZPR32, ZPR8>;
def _D : sve2_complex_int_arith<0b11, 0b0001, asm, ZPR64, ZPR16>;
}
//===----------------------------------------------------------------------===//
// SVE2 Complex Integer Dot Product - Indexed Group
//===----------------------------------------------------------------------===//
class sve2_complex_int_arith_indexed<bits<2> sz, bits<4> opc, string asm,
ZPRRegOp zprty1, ZPRRegOp zprty2,
ZPRRegOp zprty3, Operand itype>
: I<(outs zprty1:$Zda), (ins zprty1:$_Zda, zprty2:$Zn, zprty3:$Zm, itype:$iop,
complexrotateop:$rot),
asm, "\t$Zda, $Zn, $Zm$iop, $rot", "", []>, Sched<[]> {
bits<5> Zda;
bits<5> Zn;
bits<2> rot;
let Inst{31-24} = 0b01000100;
let Inst{23-22} = sz;
let Inst{21} = 0b1;
let Inst{15-12} = opc;
let Inst{11-10} = rot;
let Inst{9-5} = Zn;
let Inst{4-0} = Zda;
let Constraints = "$Zda = $_Zda";
let DestructiveInstType = Destructive;
let ElementSize = ElementSizeNone;
}
multiclass sve2_cintx_dot_by_indexed_elem<string asm> {
def _S : sve2_complex_int_arith_indexed<0b10, 0b0100, asm, ZPR32, ZPR8, ZPR3b8, VectorIndexS> {
bits<2> iop;
bits<3> Zm;
let Inst{20-19} = iop;
let Inst{18-16} = Zm;
}
def _D : sve2_complex_int_arith_indexed<0b11, 0b0100, asm, ZPR64, ZPR16, ZPR4b16, VectorIndexD> {
bit iop;
bits<4> Zm;
let Inst{20} = iop;
let Inst{19-16} = Zm;
}
}
//===----------------------------------------------------------------------===//
// SVE2 Integer Multiply - Unpredicated Group
//===----------------------------------------------------------------------===//

View File

@ -0,0 +1,103 @@
// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 2>&1 < %s| FileCheck %s
// ------------------------------------------------------------------------- //
// Invalid element size
cdot z0.s, z1.h, z31.h, #0
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
// CHECK-NEXT: cdot z0.s, z1.h, z31.h, #0
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
cdot z0.s, z1.s, z31.s, #0
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
// CHECK-NEXT: cdot z0.s, z1.s, z31.s, #0
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
cdot z0.s, z1.d, z31.d, #0
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
// CHECK-NEXT: cdot z0.s, z1.d, z31.d, #0
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
cdot z0.d, z1.b, z31.b, #0
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
// CHECK-NEXT: cdot z0.d, z1.b, z31.b, #0
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
cdot z0.d, z1.s, z31.s, #0
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
// CHECK-NEXT: cdot z0.d, z1.s, z31.s, #0
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
cdot z0.d, z1.d, z31.d, #0
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
// CHECK-NEXT: cdot z0.d, z1.d, z31.d, #0
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
// ------------------------------------------------------------------------- //
// Invalid restricted register for indexed vector.
cdot z0.s, z1.b, z8.b[3], #0
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
// CHECK-NEXT: cdot z0.s, z1.b, z8.b[3], #0
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
cdot z0.d, z1.h, z16.h[1], #0
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
// CHECK-NEXT: cdot z0.d, z1.h, z16.h[1], #0
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
// ------------------------------------------------------------------------- //
// Invalid element index
cdot z0.s, z1.b, z7.b[-1], #0
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 3].
// CHECK-NEXT: cdot z0.s, z1.b, z7.b[-1], #0
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
cdot z0.s, z1.b, z7.b[4], #0
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 3].
// CHECK-NEXT: cdot z0.s, z1.b, z7.b[4], #0
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
cdot z0.d, z1.h, z15.h[-1], #0
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 1].
// CHECK-NEXT: cdot z0.d, z1.h, z15.h[-1], #0
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
cdot z0.d, z1.h, z15.h[2], #0
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 1].
// CHECK-NEXT: cdot z0.d, z1.h, z15.h[2], #0
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
// --------------------------------------------------------------------------//
// Invalid rotation
cdot z0.s, z1.b, z2.b[0], #360
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: complex rotation must be 0, 90, 180 or 270.
// CHECK-NEXT: cdot z0.s, z1.b, z2.b[0], #360
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
cdot z0.d, z1.h, z2.h[0], #450
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: complex rotation must be 0, 90, 180 or 270.
// CHECK-NEXT: cdot z0.d, z1.h, z2.h[0], #450
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
// --------------------------------------------------------------------------//
// Negative tests for instructions that are incompatible with movprfx
movprfx z0.d, p0/z, z7.d
cdot z0.d, z1.h, z31.h, #0
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
// CHECK-NEXT: cdot z0.d, z1.h, z31.h, #0
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
movprfx z0.d, p0/z, z7.d
cdot z0.d, z1.h, z15.h[1], #0
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
// CHECK-NEXT: cdot z0.d, z1.h, z15.h[1], #0
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:

View File

@ -0,0 +1,96 @@
// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
// RUN: | FileCheck %s --check-prefix=CHECK-ERROR
// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
// RUN: | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
cdot z0.s, z1.b, z31.b, #0
// CHECK-INST: cdot z0.s, z1.b, z31.b, #0
// CHECK-ENCODING: [0x20,0x10,0x9f,0x44]
// CHECK-ERROR: instruction requires: sve2
// CHECK-UNKNOWN: 20 10 9f 44 <unknown>
cdot z0.d, z1.h, z31.h, #0
// CHECK-INST: cdot z0.d, z1.h, z31.h, #0
// CHECK-ENCODING: [0x20,0x10,0xdf,0x44]
// CHECK-ERROR: instruction requires: sve2
// CHECK-UNKNOWN: 20 10 df 44 <unknown>
cdot z0.d, z1.h, z31.h, #90
// CHECK-INST: cdot z0.d, z1.h, z31.h, #90
// CHECK-ENCODING: [0x20,0x14,0xdf,0x44]
// CHECK-ERROR: instruction requires: sve2
// CHECK-UNKNOWN: 20 14 df 44 <unknown>
cdot z0.d, z1.h, z31.h, #180
// CHECK-INST: cdot z0.d, z1.h, z31.h, #180
// CHECK-ENCODING: [0x20,0x18,0xdf,0x44]
// CHECK-ERROR: instruction requires: sve2
// CHECK-UNKNOWN: 20 18 df 44 <unknown>
cdot z0.d, z1.h, z31.h, #270
// CHECK-INST: cdot z0.d, z1.h, z31.h, #270
// CHECK-ENCODING: [0x20,0x1c,0xdf,0x44]
// CHECK-ERROR: instruction requires: sve2
// CHECK-UNKNOWN: 20 1c df 44 <unknown>
cdot z0.s, z1.b, z7.b[3], #0
// CHECK-INST: cdot z0.s, z1.b, z7.b[3], #0
// CHECK-ENCODING: [0x20,0x40,0xbf,0x44]
// CHECK-ERROR: instruction requires: sve2
// CHECK-UNKNOWN: 20 40 bf 44 <unknown>
cdot z0.d, z1.h, z15.h[1], #0
// CHECK-INST: cdot z0.d, z1.h, z15.h[1], #0
// CHECK-ENCODING: [0x20,0x40,0xff,0x44]
// CHECK-ERROR: instruction requires: sve2
// CHECK-UNKNOWN: 20 40 ff 44 <unknown>
cdot z5.d, z6.h, z3.h[0], #90
// CHECK-INST: cdot z5.d, z6.h, z3.h[0], #90
// CHECK-ENCODING: [0xc5,0x44,0xe3,0x44]
// CHECK-ERROR: instruction requires: sve2
// CHECK-UNKNOWN: c5 44 e3 44 <unknown>
cdot z29.d, z30.h, z0.h[0], #180
// CHECK-INST: cdot z29.d, z30.h, z0.h[0], #180
// CHECK-ENCODING: [0xdd,0x4b,0xe0,0x44]
// CHECK-ERROR: instruction requires: sve2
// CHECK-UNKNOWN: dd 4b e0 44 <unknown>
cdot z31.d, z30.h, z7.h[1], #270
// CHECK-INST: cdot z31.d, z30.h, z7.h[1], #270
// CHECK-ENCODING: [0xdf,0x4f,0xf7,0x44]
// CHECK-ERROR: instruction requires: sve2
// CHECK-UNKNOWN: df 4f f7 44 <unknown>
// --------------------------------------------------------------------------//
// Test compatibility with MOVPRFX instruction.
movprfx z0, z7
// CHECK-INST: movprfx z0, z7
// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
cdot z0.d, z1.h, z31.h, #0
// CHECK-INST: cdot z0.d, z1.h, z31.h, #0
// CHECK-ENCODING: [0x20,0x10,0xdf,0x44]
// CHECK-ERROR: instruction requires: sve2
// CHECK-UNKNOWN: 20 10 df 44 <unknown>
movprfx z0, z7
// CHECK-INST: movprfx z0, z7
// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
// CHECK-ERROR: instruction requires: sve
// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
cdot z0.d, z1.h, z15.h[1], #0
// CHECK-INST: cdot z0.d, z1.h, z15.h[1], #0
// CHECK-ENCODING: [0x20,0x40,0xff,0x44]
// CHECK-ERROR: instruction requires: sve2
// CHECK-UNKNOWN: 20 40 ff 44 <unknown>