[ARM] Assembler support for the ARMv8.2a dot product instructions

Commit r310480 added the AArch64 ARMv8.2a dot product instructions;
this adds the AArch32 instructions.

Differential Revision: https://reviews.llvm.org/D36575

llvm-svn: 310701
This commit is contained in:
Sjoerd Meijer 2017-08-11 09:52:30 +00:00
parent 9b6b5d5a88
commit 7426c97bc6
13 changed files with 236 additions and 2 deletions

View File

@ -114,6 +114,9 @@ def FeatureCrypto : SubtargetFeature<"crypto", "HasCrypto", "true",
def FeatureCRC : SubtargetFeature<"crc", "HasCRC", "true",
"Enable support for CRC instructions">;
def FeatureDotProd : SubtargetFeature<"dotprod", "HasDotProd", "true",
"Enable support for dot product instructions",
[FeatureNEON]>;
// Not to be confused with FeatureHasRetAddrStack (return address stack)
def FeatureRAS : SubtargetFeature<"ras", "HasRAS", "true",

View File

@ -259,6 +259,8 @@ def HasNEON : Predicate<"Subtarget->hasNEON()">,
AssemblerPredicate<"FeatureNEON", "NEON">;
def HasCrypto : Predicate<"Subtarget->hasCrypto()">,
AssemblerPredicate<"FeatureCrypto", "crypto">;
def HasDotProd : Predicate<"Subtarget->hasDotProd()">,
AssemblerPredicate<"FeatureDotProd", "dotprod">;
def HasCRC : Predicate<"Subtarget->hasCRC()">,
AssemblerPredicate<"FeatureCRC", "crc">;
def HasRAS : Predicate<"Subtarget->hasRAS()">,
@ -5037,6 +5039,8 @@ def CDP : ABI<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1,
let Inst{15-12} = CRd;
let Inst{19-16} = CRn;
let Inst{23-20} = opc1;
let DecoderNamespace = "CoProc";
}
def CDP2 : ABXI<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1,
@ -5060,6 +5064,8 @@ def CDP2 : ABXI<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1,
let Inst{15-12} = CRd;
let Inst{19-16} = CRn;
let Inst{23-20} = opc1;
let DecoderNamespace = "CoProc";
}
class ACI<dag oops, dag iops, string opc, string asm,
@ -5075,6 +5081,8 @@ class ACInoP<dag oops, dag iops, string opc, string asm,
let Inst{31-28} = 0b1111;
let Inst{27-25} = 0b110;
}
let DecoderNamespace = "CoProc" in {
multiclass LdStCop<bit load, bit Dbit, string asm, list<dag> pattern> {
def _OFFSET : ACI<(outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5:$addr),
asm, "\t$cop, $CRd, $addr", pattern> {
@ -5228,6 +5236,8 @@ defm STCL : LdStCop <0, 1, "stcl", [(int_arm_stcl imm:$cop, imm:$CRd, addrmode5
defm STC2 : LdSt2Cop<0, 0, "stc2", [(int_arm_stc2 imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[IsARM,PreV8]>;
defm STC2L : LdSt2Cop<0, 1, "stc2l", [(int_arm_stc2l imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[IsARM,PreV8]>;
} // DecoderNamespace = "CoProc"
//===----------------------------------------------------------------------===//
// Move between coprocessor and ARM core register.
//
@ -5252,6 +5262,8 @@ class MovRCopro<string opc, bit direction, dag oops, dag iops,
let Inst{7-5} = opc2;
let Inst{3-0} = CRm;
let Inst{19-16} = CRn;
let DecoderNamespace = "CoProc";
}
def MCR : MovRCopro<"mcr", 0 /* from ARM core register to coprocessor */,
@ -5296,6 +5308,8 @@ class MovRCopro2<string opc, bit direction, dag oops, dag iops,
let Inst{7-5} = opc2;
let Inst{3-0} = CRm;
let Inst{19-16} = CRn;
let DecoderNamespace = "CoProc";
}
def MCR2 : MovRCopro2<"mcr2", 0 /* from ARM core register to coprocessor */,

View File

@ -4672,6 +4672,42 @@ def : Pat<(v4f32 (fma (fneg QPR:$Vn), QPR:$Vm, QPR:$src1)),
(VFMSfq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
Requires<[HasVFP4]>;
// ARMv8.2a dot product instructions.
// We put them in the VFPV8 decoder namespace because the ARM and Thumb
// encodings are the same and thus no further bit twiddling is necessary
// in the disassembler.
let Predicates = [HasDotProd], DecoderNamespace = "VFPV8" in {
def VUDOTD : N3Vnp<0b11000, 0b10, 0b1101, 0b0, 0b1,
(outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm),
N3RegFrm, IIC_VDOTPROD, "vudot", "u8", []>;
def VSDOTD : N3Vnp<0b11000, 0b10, 0b1101, 0b0, 0b0,
(outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm),
N3RegFrm, IIC_VDOTPROD, "vsdot", "s8", []>;
def VUDOTQ : N3Vnp<0b11000, 0b10, 0b1101, 0b1, 0b1,
(outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm),
N3RegFrm, IIC_VDOTPROD, "vudot", "u8", []>;
def VSDOTQ : N3Vnp<0b11000, 0b10, 0b1101, 0b1, 0b0,
(outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm),
N3RegFrm, IIC_VDOTPROD, "vsdot", "s8", []>;
// Indexed dot product instructions:
class DOTI<string opc, string dt, bit Q, bit U, RegisterClass Ty> :
N3Vnp<0b11100, 0b10, 0b1101, Q, U,
(outs Ty:$Vd), (ins Ty:$Vn, DPR:$Vm, VectorIndex32:$lane),
N3RegFrm, IIC_VDOTPROD, opc, dt, []> {
bit lane;
let Inst{5} = lane;
let AsmString = !strconcat(opc, ".", dt, "\t$Vd, $Vn, $Vm$lane");
}
def VUDOTDI : DOTI<"vudot", "u8", 0b0, 0b1, DPR>;
def VSDOTDI : DOTI<"vsdot", "s8", 0b0, 0b0, DPR>;
def VUDOTQI : DOTI<"vudot", "u8", 0b1, 0b1, QPR>;
def VSDOTQI : DOTI<"vsdot", "s8", 0b1, 0b0, QPR>;
} // HasDotProd
// Vector Subtract Operations.
// VSUB : Vector Subtract (integer and floating-point)

View File

@ -3964,6 +3964,7 @@ multiclass t2LdStCop<bits<4> op31_28, bit load, bit Dbit, string asm, list<dag>
}
}
let DecoderNamespace = "Thumb2CoProc" in {
defm t2LDC : t2LdStCop<0b1110, 1, 0, "ldc", [(int_arm_ldc imm:$cop, imm:$CRd, addrmode5:$addr)]>;
defm t2LDCL : t2LdStCop<0b1110, 1, 1, "ldcl", [(int_arm_ldcl imm:$cop, imm:$CRd, addrmode5:$addr)]>;
defm t2LDC2 : t2LdStCop<0b1111, 1, 0, "ldc2", [(int_arm_ldc2 imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[PreV8,IsThumb2]>;
@ -3973,6 +3974,7 @@ defm t2STC : t2LdStCop<0b1110, 0, 0, "stc", [(int_arm_stc imm:$cop, imm:$CRd,
defm t2STCL : t2LdStCop<0b1110, 0, 1, "stcl", [(int_arm_stcl imm:$cop, imm:$CRd, addrmode5:$addr)]>;
defm t2STC2 : t2LdStCop<0b1111, 0, 0, "stc2", [(int_arm_stc2 imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[PreV8,IsThumb2]>;
defm t2STC2L : t2LdStCop<0b1111, 0, 1, "stc2l", [(int_arm_stc2l imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[PreV8,IsThumb2]>;
}
//===----------------------------------------------------------------------===//
@ -4125,6 +4127,8 @@ class t2MovRCopro<bits<4> Op, string opc, bit direction, dag oops, dag iops,
let Inst{7-5} = opc2;
let Inst{3-0} = CRm;
let Inst{19-16} = CRn;
let DecoderNamespace = "Thumb2CoProc";
}
class t2MovRRCopro<bits<4> Op, string opc, bit direction, dag oops, dag iops,
@ -4145,6 +4149,8 @@ class t2MovRRCopro<bits<4> Op, string opc, bit direction, dag oops, dag iops,
let Inst{11-8} = cop;
let Inst{7-4} = opc1;
let Inst{3-0} = CRm;
let DecoderNamespace = "Thumb2CoProc";
}
/* from ARM core register to coprocessor */
@ -4243,6 +4249,7 @@ def t2CDP : T2Cop<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1,
let Inst{23-20} = opc1;
let Predicates = [IsThumb2, PreV8];
let DecoderNamespace = "Thumb2CoProc";
}
def t2CDP2 : T2Cop<0b1111, (outs), (ins p_imm:$cop, imm0_15:$opc1,
@ -4268,6 +4275,7 @@ def t2CDP2 : T2Cop<0b1111, (outs), (ins p_imm:$cop, imm0_15:$opc1,
let Inst{23-20} = opc1;
let Predicates = [IsThumb2, PreV8];
let DecoderNamespace = "Thumb2CoProc";
}

View File

@ -414,6 +414,7 @@ def IIC_VTBX1 : InstrItinClass;
def IIC_VTBX2 : InstrItinClass;
def IIC_VTBX3 : InstrItinClass;
def IIC_VTBX4 : InstrItinClass;
def IIC_VDOTPROD : InstrItinClass;
//===----------------------------------------------------------------------===//
// Processor instruction itineraries.

View File

@ -156,6 +156,9 @@ protected:
bool HasFPARMv8 = false;
bool HasNEON = false;
/// HasDotProd - True if the ARMv8.2A dot product instructions are supported.
bool HasDotProd = false;
/// UseNEONForSinglePrecisionFP - if the NEONFP attribute has been
/// specified. Use the method useNEONForSinglePrecisionFP() to
/// determine if NEON should actually be used.
@ -521,6 +524,7 @@ public:
bool hasFPARMv8() const { return HasFPARMv8; }
bool hasNEON() const { return HasNEON; }
bool hasCrypto() const { return HasCrypto; }
bool hasDotProd() const { return HasDotProd; }
bool hasCRC() const { return HasCRC; }
bool hasRAS() const { return HasRAS; }
bool hasVirtualization() const { return HasVirtualization; }

View File

@ -5348,7 +5348,8 @@ StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic,
Mnemonic == "vcvtm" || Mnemonic == "vrinta" || Mnemonic == "vrintn" ||
Mnemonic == "vrintp" || Mnemonic == "vrintm" || Mnemonic == "hvc" ||
Mnemonic.startswith("vsel") || Mnemonic == "vins" || Mnemonic == "vmovx" ||
Mnemonic == "bxns" || Mnemonic == "blxns")
Mnemonic == "bxns" || Mnemonic == "blxns" ||
Mnemonic == "vudot" || Mnemonic == "vsdot")
return Mnemonic;
// First, split out any predication code. Ignore mnemonics we know aren't
@ -5454,7 +5455,8 @@ void ARMAsmParser::getMnemonicAcceptInfo(StringRef Mnemonic, StringRef FullInst,
Mnemonic.startswith("aes") || Mnemonic == "hvc" || Mnemonic == "setpan" ||
Mnemonic.startswith("sha1") || Mnemonic.startswith("sha256") ||
(FullInst.startswith("vmull") && FullInst.endswith(".p64")) ||
Mnemonic == "vmovx" || Mnemonic == "vins") {
Mnemonic == "vmovx" || Mnemonic == "vins" ||
Mnemonic == "vudot" || Mnemonic == "vsdot") {
// These mnemonics are never predicable
CanAcceptPredicationCode = false;
} else if (!isThumb()) {

View File

@ -486,6 +486,13 @@ DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
}
}
Result =
decodeInstruction(DecoderTableCoProc32, MI, Insn, Address, this, STI);
if (Result != MCDisassembler::Fail) {
Size = 4;
return checkDecodedInstruction(MI, Size, Address, OS, CS, Insn, Result);
}
Size = 4;
return MCDisassembler::Fail;
}
@ -821,6 +828,14 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
}
}
Result =
decodeInstruction(DecoderTableThumb2CoProc32, MI, Insn32, Address, this, STI);
if (Result != MCDisassembler::Fail) {
Size = 4;
Check(Result, AddThumbPredicate(MI));
return Result;
}
Size = 0;
return MCDisassembler::Fail;
}

View File

@ -0,0 +1,37 @@
// RUN: llvm-mc -triple arm -mattr=+dotprod -show-encoding < %s | FileCheck %s --check-prefix=CHECK
// RUN: not llvm-mc -triple arm -mattr=-dotprod -show-encoding < %s 2> %t
// RUN: FileCheck --check-prefix=CHECK-NO-DOTPROD < %t %s
// RUN: not llvm-mc -triple arm -show-encoding < %s 2> %t
// RUN: FileCheck --check-prefix=CHECK-NO-DOTPROD < %t %s
// RUN: not llvm-mc -triple arm -mattr=+v8.1a -show-encoding < %s 2> %t
// RUN: FileCheck --check-prefix=CHECK-NO-DOTPROD < %t %s
// RUN: not llvm-mc -triple arm -mattr=+v8.2a -show-encoding < %s 2> %t
// RUN: FileCheck --check-prefix=CHECK-NO-DOTPROD < %t %s
vudot.u8 d0, d1, d2
vsdot.s8 d0, d1, d2
vudot.u8 q0, q1, q4
vsdot.s8 q0, q1, q4
vudot.u8 d0, d1, d2[0]
vsdot.s8 d0, d1, d2[1]
vudot.u8 q0, q1, d4[0]
vsdot.s8 q0, q1, d4[1]
// CHECK: vudot.u8 d0, d1, d2 @ encoding: [0x12,0x0d,0x21,0xfc]
// CHECK: vsdot.s8 d0, d1, d2 @ encoding: [0x02,0x0d,0x21,0xfc]
// CHECK: vudot.u8 q0, q1, q4 @ encoding: [0x58,0x0d,0x22,0xfc]
// CHECK: vsdot.s8 q0, q1, q4 @ encoding: [0x48,0x0d,0x22,0xfc]
// CHECK: vudot.u8 d0, d1, d2[0] @ encoding: [0x12,0x0d,0x21,0xfe]
// CHECK: vsdot.s8 d0, d1, d2[1] @ encoding: [0x22,0x0d,0x21,0xfe]
// CHECK: vudot.u8 q0, q1, d4[0] @ encoding: [0x54,0x0d,0x22,0xfe]
// CHECK: vsdot.s8 q0, q1, d4[1] @ encoding: [0x64,0x0d,0x22,0xfe]
// CHECK-NO-DOTPROD: error: instruction requires: dotprod
// CHECK-NO-DOTPROD: error: instruction requires: dotprod
// CHECK-NO-DOTPROD: error: instruction requires: dotprod
// CHECK-NO-DOTPROD: error: instruction requires: dotprod
// CHECK-NO-DOTPROD: error: instruction requires: dotprod
// CHECK-NO-DOTPROD: error: instruction requires: dotprod
// CHECK-NO-DOTPROD: error: instruction requires: dotprod
// CHECK-NO-DOTPROD: error: instruction requires: dotprod

View File

@ -0,0 +1,14 @@
// RUN: not llvm-mc -triple arm -mattr=+dotprod -show-encoding < %s 2> %t
// RUN: FileCheck --check-prefix=CHECK-ERROR < %t %s
// RUN: not llvm-mc -triple thumb -mattr=+dotprod -show-encoding < %s 2> %t
// RUN: FileCheck --check-prefix=CHECK-ERROR < %t %s
vudot.u8 d0, d1, d2[2]
vsdot.s8 d0, d1, d2[2]
vudot.u8 q0, q1, d4[2]
vsdot.s8 q0, q1, d4[2]
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: error: invalid operand for instruction

View File

@ -0,0 +1,38 @@
// RUN: llvm-mc -triple thumb -mattr=+dotprod -show-encoding < %s | FileCheck %s --check-prefix=CHECK
// RUN: not llvm-mc -triple thumb -mattr=-dotprod -show-encoding < %s 2> %t
// RUN: FileCheck --check-prefix=CHECK-ERROR < %t %s
// RUN: not llvm-mc -triple thumb -show-encoding < %s 2> %t
// RUN: FileCheck --check-prefix=CHECK-ERROR < %t %s
// RUN: not llvm-mc -triple thumb -mattr=+v8.1a -show-encoding < %s 2> %t
// RUN: FileCheck --check-prefix=CHECK-ERROR < %t %s
// RUN: not llvm-mc -triple thumb -mattr=+v8.2a -show-encoding < %s 2> %t
// RUN: FileCheck --check-prefix=CHECK-ERROR < %t %s
vudot.u8 d0, d1, d2
vsdot.s8 d0, d1, d2
vudot.u8 q0, q1, q4
vsdot.s8 q0, q1, q4
vudot.u8 d0, d1, d2[0]
vsdot.s8 d0, d1, d2[1]
vudot.u8 q0, q1, d4[0]
vsdot.s8 q0, q1, d4[1]
//CHECK: vudot.u8 d0, d1, d2 @ encoding: [0x21,0xfc,0x12,0x0d]
//CHECK: vsdot.s8 d0, d1, d2 @ encoding: [0x21,0xfc,0x02,0x0d]
//CHECK: vudot.u8 q0, q1, q4 @ encoding: [0x22,0xfc,0x58,0x0d]
//CHECK: vsdot.s8 q0, q1, q4 @ encoding: [0x22,0xfc,0x48,0x0d]
//CHECK: vudot.u8 d0, d1, d2[0] @ encoding: [0x21,0xfe,0x12,0x0d]
//CHECK: vsdot.s8 d0, d1, d2[1] @ encoding: [0x21,0xfe,0x22,0x0d]
//CHECK: vudot.u8 q0, q1, d4[0] @ encoding: [0x22,0xfe,0x54,0x0d]
//CHECK: vsdot.s8 q0, q1, d4[1] @ encoding: [0x22,0xfe,0x64,0x0d]
//CHECK-ERROR: error: instruction requires: dotprod
//CHECK-ERROR: error: instruction requires: dotprod
//CHECK-ERROR: error: instruction requires: dotprod
//CHECK-ERROR: error: instruction requires: dotprod
//CHECK-ERROR: error: instruction requires: dotprod
//CHECK-ERROR: error: instruction requires: dotprod
//CHECK-ERROR: error: instruction requires: dotprod
//CHECK-ERROR: error: instruction requires: dotprod

View File

@ -0,0 +1,33 @@
# RUN: llvm-mc -triple arm-none-linux-gnu -mattr=+dotprod --disassemble < %s | FileCheck %s
# RUN: llvm-mc -triple arm-none-linux-gnu -mattr=-dotprod --disassemble < %s 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR
0x12,0x0d,0x21,0xfc
0x02,0x0d,0x21,0xfc
0x58,0x0d,0x22,0xfc
0x48,0x0d,0x22,0xfc
0x12,0x0d,0x21,0xfe
0x22,0x0d,0x21,0xfe
0x54,0x0d,0x22,0xfe
0x64,0x0d,0x22,0xfe
#CHECK: vudot.u8 d0, d1, d2
#CHECK: vsdot.s8 d0, d1, d2
#CHECK: vudot.u8 q0, q1, q4
#CHECK: vsdot.s8 q0, q1, q4
#CHECK: vudot.u8 d0, d1, d2[0]
#CHECK: vsdot.s8 d0, d1, d2[1]
#CHECK: vudot.u8 q0, q1, d4[0]
#CHECK: vsdot.s8 q0, q1, d4[1]
# without dot product enabled, the instructions get disassembled to these
# coprocessor instructions:
#CHECK-ERROR: stc2 p13, c0, [r1], #-72
#CHECK-ERROR: stc2 p13, c0, [r1], #-8
#CHECK-ERROR: stc2 p13, c0, [r2], #-352
#CHECK-ERROR: stc2 p13, c0, [r2], #-288
#CHECK-ERROR: mcr2 p13, #1, r0, c1, c2, #0
#CHECK-ERROR: cdp2 p13, #2, c0, c1, c2, #1
#CHECK-ERROR: mcr2 p13, #1, r0, c2, c4, #2
#CHECK-ERROR: cdp2 p13, #2, c0, c2, c4, #3

View File

@ -0,0 +1,29 @@
# RUN: llvm-mc -triple thumbv7a -mattr=+dotprod --disassemble < %s | FileCheck %s
# RUN: llvm-mc -triple thumbv7a -mattr=-dotprod --disassemble < %s 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR
[0x21,0xfc,0x12,0x0d]
[0x21,0xfc,0x02,0x0d]
[0x22,0xfc,0x58,0x0d]
[0x22,0xfc,0x48,0x0d]
[0x21,0xfe,0x12,0x0d]
[0x21,0xfe,0x22,0x0d]
[0x22,0xfe,0x54,0x0d]
[0x22,0xfe,0x64,0x0d]
#CHECK: vudot.u8 d0, d1, d2
#CHECK: vsdot.s8 d0, d1, d2
#CHECK: vudot.u8 q0, q1, q4
#CHECK: vsdot.s8 q0, q1, q4
#CHECK: vudot.u8 d0, d1, d2[0]
#CHECK: vsdot.s8 d0, d1, d2[1]
#CHECK: vudot.u8 q0, q1, d4[0]
#CHECK: vsdot.s8 q0, q1, d4[1]
#CHECK-ERROR: stc2 p13, c0, [r1], #-72
#CHECK-ERROR: stc2 p13, c0, [r1], #-8
#CHECK-ERROR: stc2 p13, c0, [r2], #-352
#CHECK-ERROR: stc2 p13, c0, [r2], #-288
#CHECK-ERROR: mcr2 p13, #1, r0, c1, c2, #0
#CHECK-ERROR: cdp2 p13, #2, c0, c1, c2, #1
#CHECK-ERROR: mcr2 p13, #1, r0, c2, c4, #2
#CHECK-ERROR: cdp2 p13, #2, c0, c2, c4, #3