forked from OSchip/llvm-project
[AArch64][SVE2] Asm: implement CDOT instruction
Summary: The complex DOT instructions perform a dot-product on quadtuplets from two source vectors and the resuling wide real or wide imaginary is accumulated into the destination register. The instructions come in two forms: Vector form, e.g. cdot z0.s, z1.b, z2.b, #90 - complex dot product on four 8-bit quad-tuplets, accumulating results in 32-bit elements. The complex numbers in the second source vector are rotated by 90 degrees. cdot z0.d, z1.h, z2.h, #180 - complex dot product on four 16-bit quad-tuplets, accumulating results in 64-bit elements. The complex numbers in the second source vector are rotated by 180 degrees. Indexed form, e.g. cdot z0.s, z1.b, z2.b[3], #0 - complex dot product on four 8-bit quad-tuplets, with specified quadtuplet from second source vector, accumulating results in 32-bit elements. cdot z0.d, z1.h, z2.h[1], #0 - complex dot product on four 16-bit quad-tuplets, with specified quadtuplet from second source vector, accumulating results in 64-bit elements. The specification can be found here: https://developer.arm.com/docs/ddi0602/latest Reviewed By: SjoerdMeijer, rovka Differential Revision: https://reviews.llvm.org/D61903 llvm-svn: 360870
This commit is contained in:
parent
3cbf3c8412
commit
07eba98dd7
|
@ -1051,4 +1051,10 @@ let Predicates = [HasSVE2] in {
|
|||
defm SMULH_ZZZ : sve2_int_mul<0b010, "smulh">;
|
||||
defm UMULH_ZZZ : sve2_int_mul<0b011, "umulh">;
|
||||
def PMUL_ZZZ_B : sve2_int_mul<0b00, 0b001, "pmul", ZPR8>;
|
||||
|
||||
// SVE2 complex integer dot product (indexed)
|
||||
defm CDOT_ZZZI : sve2_cintx_dot_by_indexed_elem<"cdot">;
|
||||
|
||||
// SVE2 complex integer dot product
|
||||
defm CDOT_ZZZ : sve2_cintx_dot<"cdot">;
|
||||
}
|
||||
|
|
|
@ -1837,6 +1837,79 @@ multiclass sve_intx_dot_by_indexed_elem<bit opc, string asm> {
|
|||
}
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// SVE2 Complex Integer Dot Product Group
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
class sve2_complex_int_arith<bits<2> sz, bits<4> opc, string asm,
|
||||
ZPRRegOp zprty1, ZPRRegOp zprty2>
|
||||
: I<(outs zprty1:$Zda), (ins zprty1:$_Zda, zprty2:$Zn, zprty2:$Zm,
|
||||
complexrotateop:$rot),
|
||||
asm, "\t$Zda, $Zn, $Zm, $rot", "", []>, Sched<[]> {
|
||||
bits<5> Zda;
|
||||
bits<5> Zn;
|
||||
bits<5> Zm;
|
||||
bits<2> rot;
|
||||
let Inst{31-24} = 0b01000100;
|
||||
let Inst{23-22} = sz;
|
||||
let Inst{21} = 0b0;
|
||||
let Inst{20-16} = Zm;
|
||||
let Inst{15-12} = opc;
|
||||
let Inst{11-10} = rot;
|
||||
let Inst{9-5} = Zn;
|
||||
let Inst{4-0} = Zda;
|
||||
|
||||
let Constraints = "$Zda = $_Zda";
|
||||
let DestructiveInstType = Destructive;
|
||||
let ElementSize = ElementSizeNone;
|
||||
}
|
||||
|
||||
multiclass sve2_cintx_dot<string asm> {
|
||||
def _S : sve2_complex_int_arith<0b10, 0b0001, asm, ZPR32, ZPR8>;
|
||||
def _D : sve2_complex_int_arith<0b11, 0b0001, asm, ZPR64, ZPR16>;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// SVE2 Complex Integer Dot Product - Indexed Group
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
class sve2_complex_int_arith_indexed<bits<2> sz, bits<4> opc, string asm,
|
||||
ZPRRegOp zprty1, ZPRRegOp zprty2,
|
||||
ZPRRegOp zprty3, Operand itype>
|
||||
: I<(outs zprty1:$Zda), (ins zprty1:$_Zda, zprty2:$Zn, zprty3:$Zm, itype:$iop,
|
||||
complexrotateop:$rot),
|
||||
asm, "\t$Zda, $Zn, $Zm$iop, $rot", "", []>, Sched<[]> {
|
||||
bits<5> Zda;
|
||||
bits<5> Zn;
|
||||
bits<2> rot;
|
||||
let Inst{31-24} = 0b01000100;
|
||||
let Inst{23-22} = sz;
|
||||
let Inst{21} = 0b1;
|
||||
let Inst{15-12} = opc;
|
||||
let Inst{11-10} = rot;
|
||||
let Inst{9-5} = Zn;
|
||||
let Inst{4-0} = Zda;
|
||||
|
||||
let Constraints = "$Zda = $_Zda";
|
||||
let DestructiveInstType = Destructive;
|
||||
let ElementSize = ElementSizeNone;
|
||||
}
|
||||
|
||||
multiclass sve2_cintx_dot_by_indexed_elem<string asm> {
|
||||
def _S : sve2_complex_int_arith_indexed<0b10, 0b0100, asm, ZPR32, ZPR8, ZPR3b8, VectorIndexS> {
|
||||
bits<2> iop;
|
||||
bits<3> Zm;
|
||||
let Inst{20-19} = iop;
|
||||
let Inst{18-16} = Zm;
|
||||
}
|
||||
def _D : sve2_complex_int_arith_indexed<0b11, 0b0100, asm, ZPR64, ZPR16, ZPR4b16, VectorIndexD> {
|
||||
bit iop;
|
||||
bits<4> Zm;
|
||||
let Inst{20} = iop;
|
||||
let Inst{19-16} = Zm;
|
||||
}
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// SVE2 Integer Multiply - Unpredicated Group
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
|
|
@ -0,0 +1,103 @@
|
|||
// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 2>&1 < %s| FileCheck %s
|
||||
|
||||
|
||||
// ------------------------------------------------------------------------- //
|
||||
// Invalid element size
|
||||
|
||||
cdot z0.s, z1.h, z31.h, #0
|
||||
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
|
||||
// CHECK-NEXT: cdot z0.s, z1.h, z31.h, #0
|
||||
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
|
||||
|
||||
cdot z0.s, z1.s, z31.s, #0
|
||||
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
|
||||
// CHECK-NEXT: cdot z0.s, z1.s, z31.s, #0
|
||||
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
|
||||
|
||||
cdot z0.s, z1.d, z31.d, #0
|
||||
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
|
||||
// CHECK-NEXT: cdot z0.s, z1.d, z31.d, #0
|
||||
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
|
||||
|
||||
cdot z0.d, z1.b, z31.b, #0
|
||||
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
|
||||
// CHECK-NEXT: cdot z0.d, z1.b, z31.b, #0
|
||||
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
|
||||
|
||||
cdot z0.d, z1.s, z31.s, #0
|
||||
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
|
||||
// CHECK-NEXT: cdot z0.d, z1.s, z31.s, #0
|
||||
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
|
||||
|
||||
cdot z0.d, z1.d, z31.d, #0
|
||||
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
|
||||
// CHECK-NEXT: cdot z0.d, z1.d, z31.d, #0
|
||||
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
|
||||
|
||||
|
||||
// ------------------------------------------------------------------------- //
|
||||
// Invalid restricted register for indexed vector.
|
||||
|
||||
cdot z0.s, z1.b, z8.b[3], #0
|
||||
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
|
||||
// CHECK-NEXT: cdot z0.s, z1.b, z8.b[3], #0
|
||||
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
|
||||
|
||||
cdot z0.d, z1.h, z16.h[1], #0
|
||||
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
|
||||
// CHECK-NEXT: cdot z0.d, z1.h, z16.h[1], #0
|
||||
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
|
||||
|
||||
|
||||
// ------------------------------------------------------------------------- //
|
||||
// Invalid element index
|
||||
|
||||
cdot z0.s, z1.b, z7.b[-1], #0
|
||||
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 3].
|
||||
// CHECK-NEXT: cdot z0.s, z1.b, z7.b[-1], #0
|
||||
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
|
||||
|
||||
cdot z0.s, z1.b, z7.b[4], #0
|
||||
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 3].
|
||||
// CHECK-NEXT: cdot z0.s, z1.b, z7.b[4], #0
|
||||
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
|
||||
|
||||
cdot z0.d, z1.h, z15.h[-1], #0
|
||||
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 1].
|
||||
// CHECK-NEXT: cdot z0.d, z1.h, z15.h[-1], #0
|
||||
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
|
||||
|
||||
cdot z0.d, z1.h, z15.h[2], #0
|
||||
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 1].
|
||||
// CHECK-NEXT: cdot z0.d, z1.h, z15.h[2], #0
|
||||
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
|
||||
|
||||
|
||||
// --------------------------------------------------------------------------//
|
||||
// Invalid rotation
|
||||
|
||||
cdot z0.s, z1.b, z2.b[0], #360
|
||||
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: complex rotation must be 0, 90, 180 or 270.
|
||||
// CHECK-NEXT: cdot z0.s, z1.b, z2.b[0], #360
|
||||
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
|
||||
|
||||
cdot z0.d, z1.h, z2.h[0], #450
|
||||
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: complex rotation must be 0, 90, 180 or 270.
|
||||
// CHECK-NEXT: cdot z0.d, z1.h, z2.h[0], #450
|
||||
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
|
||||
|
||||
|
||||
// --------------------------------------------------------------------------//
|
||||
// Negative tests for instructions that are incompatible with movprfx
|
||||
|
||||
movprfx z0.d, p0/z, z7.d
|
||||
cdot z0.d, z1.h, z31.h, #0
|
||||
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
|
||||
// CHECK-NEXT: cdot z0.d, z1.h, z31.h, #0
|
||||
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
|
||||
|
||||
movprfx z0.d, p0/z, z7.d
|
||||
cdot z0.d, z1.h, z15.h[1], #0
|
||||
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
|
||||
// CHECK-NEXT: cdot z0.d, z1.h, z15.h[1], #0
|
||||
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
|
|
@ -0,0 +1,96 @@
|
|||
// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
|
||||
// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
|
||||
// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
|
||||
// RUN: | FileCheck %s --check-prefix=CHECK-ERROR
|
||||
// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
|
||||
// RUN: | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
|
||||
// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
|
||||
// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
|
||||
|
||||
cdot z0.s, z1.b, z31.b, #0
|
||||
// CHECK-INST: cdot z0.s, z1.b, z31.b, #0
|
||||
// CHECK-ENCODING: [0x20,0x10,0x9f,0x44]
|
||||
// CHECK-ERROR: instruction requires: sve2
|
||||
// CHECK-UNKNOWN: 20 10 9f 44 <unknown>
|
||||
|
||||
cdot z0.d, z1.h, z31.h, #0
|
||||
// CHECK-INST: cdot z0.d, z1.h, z31.h, #0
|
||||
// CHECK-ENCODING: [0x20,0x10,0xdf,0x44]
|
||||
// CHECK-ERROR: instruction requires: sve2
|
||||
// CHECK-UNKNOWN: 20 10 df 44 <unknown>
|
||||
|
||||
cdot z0.d, z1.h, z31.h, #90
|
||||
// CHECK-INST: cdot z0.d, z1.h, z31.h, #90
|
||||
// CHECK-ENCODING: [0x20,0x14,0xdf,0x44]
|
||||
// CHECK-ERROR: instruction requires: sve2
|
||||
// CHECK-UNKNOWN: 20 14 df 44 <unknown>
|
||||
|
||||
cdot z0.d, z1.h, z31.h, #180
|
||||
// CHECK-INST: cdot z0.d, z1.h, z31.h, #180
|
||||
// CHECK-ENCODING: [0x20,0x18,0xdf,0x44]
|
||||
// CHECK-ERROR: instruction requires: sve2
|
||||
// CHECK-UNKNOWN: 20 18 df 44 <unknown>
|
||||
|
||||
cdot z0.d, z1.h, z31.h, #270
|
||||
// CHECK-INST: cdot z0.d, z1.h, z31.h, #270
|
||||
// CHECK-ENCODING: [0x20,0x1c,0xdf,0x44]
|
||||
// CHECK-ERROR: instruction requires: sve2
|
||||
// CHECK-UNKNOWN: 20 1c df 44 <unknown>
|
||||
|
||||
cdot z0.s, z1.b, z7.b[3], #0
|
||||
// CHECK-INST: cdot z0.s, z1.b, z7.b[3], #0
|
||||
// CHECK-ENCODING: [0x20,0x40,0xbf,0x44]
|
||||
// CHECK-ERROR: instruction requires: sve2
|
||||
// CHECK-UNKNOWN: 20 40 bf 44 <unknown>
|
||||
|
||||
cdot z0.d, z1.h, z15.h[1], #0
|
||||
// CHECK-INST: cdot z0.d, z1.h, z15.h[1], #0
|
||||
// CHECK-ENCODING: [0x20,0x40,0xff,0x44]
|
||||
// CHECK-ERROR: instruction requires: sve2
|
||||
// CHECK-UNKNOWN: 20 40 ff 44 <unknown>
|
||||
|
||||
cdot z5.d, z6.h, z3.h[0], #90
|
||||
// CHECK-INST: cdot z5.d, z6.h, z3.h[0], #90
|
||||
// CHECK-ENCODING: [0xc5,0x44,0xe3,0x44]
|
||||
// CHECK-ERROR: instruction requires: sve2
|
||||
// CHECK-UNKNOWN: c5 44 e3 44 <unknown>
|
||||
|
||||
cdot z29.d, z30.h, z0.h[0], #180
|
||||
// CHECK-INST: cdot z29.d, z30.h, z0.h[0], #180
|
||||
// CHECK-ENCODING: [0xdd,0x4b,0xe0,0x44]
|
||||
// CHECK-ERROR: instruction requires: sve2
|
||||
// CHECK-UNKNOWN: dd 4b e0 44 <unknown>
|
||||
|
||||
cdot z31.d, z30.h, z7.h[1], #270
|
||||
// CHECK-INST: cdot z31.d, z30.h, z7.h[1], #270
|
||||
// CHECK-ENCODING: [0xdf,0x4f,0xf7,0x44]
|
||||
// CHECK-ERROR: instruction requires: sve2
|
||||
// CHECK-UNKNOWN: df 4f f7 44 <unknown>
|
||||
|
||||
|
||||
// --------------------------------------------------------------------------//
|
||||
// Test compatibility with MOVPRFX instruction.
|
||||
|
||||
movprfx z0, z7
|
||||
// CHECK-INST: movprfx z0, z7
|
||||
// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
|
||||
// CHECK-ERROR: instruction requires: sve
|
||||
// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
|
||||
|
||||
cdot z0.d, z1.h, z31.h, #0
|
||||
// CHECK-INST: cdot z0.d, z1.h, z31.h, #0
|
||||
// CHECK-ENCODING: [0x20,0x10,0xdf,0x44]
|
||||
// CHECK-ERROR: instruction requires: sve2
|
||||
// CHECK-UNKNOWN: 20 10 df 44 <unknown>
|
||||
|
||||
movprfx z0, z7
|
||||
// CHECK-INST: movprfx z0, z7
|
||||
// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
|
||||
// CHECK-ERROR: instruction requires: sve
|
||||
// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
|
||||
|
||||
cdot z0.d, z1.h, z15.h[1], #0
|
||||
// CHECK-INST: cdot z0.d, z1.h, z15.h[1], #0
|
||||
// CHECK-ENCODING: [0x20,0x40,0xff,0x44]
|
||||
// CHECK-ERROR: instruction requires: sve2
|
||||
// CHECK-UNKNOWN: 20 40 ff 44 <unknown>
|
Loading…
Reference in New Issue