[AArch64] Implemented AdvSIMD scalar x indexed element format and AdvSIMD scalar

copy in MC layer. Added the MC layer tests.  Fixed triple setting in test cases.

Patch by Ana Pazos <apazos@codeaurora.org>.

llvm-svn: 194501
This commit is contained in:
Chad Rosier 2013-11-12 19:13:08 +00:00
parent b6517850fb
commit 1eb0ecf8ce
14 changed files with 1001 additions and 37 deletions

View File

@ -1359,5 +1359,33 @@ class NeonI_Crypto_3VSHA<bits<2> size, bits<3> opcode,
// Inherit Rd in 4-0
}
// Format AdvSIMD scalar x indexed element
class NeonI_ScalarXIndexedElem<bit u, bit szhi, bit szlo,
bits<4> opcode, dag outs, dag ins,
string asmstr, list<dag> patterns,
InstrItinClass itin>
: A64InstRdnm<outs, ins, asmstr, patterns, itin>
{
let Inst{31} = 0b0;
let Inst{30} = 0b1;
let Inst{29} = u;
let Inst{28-24} = 0b11111;
let Inst{23} = szhi;
let Inst{22} = szlo;
// l in Inst{21}
// m in Instr{20}
// Inherit Rm in 19-16
let Inst{15-12} = opcode;
// h in Inst{11}
let Inst{10} = 0b0;
// Inherit Rn in 9-5
// Inherit Rd in 4-0
}
// Format AdvSIMD scalar copy - insert from element to scalar
class NeonI_ScalarCopy<dag outs, dag ins, string asmstr,
list<dag> patterns, InstrItinClass itin>
: NeonI_copy<0b1, 0b0, 0b0000, outs, ins, asmstr, patterns, itin> {
let Inst{28} = 0b1;
}
}

View File

@ -1278,7 +1278,7 @@ def : Pat<(i64 (sext_inreg (anyext i32:$Rn), i1)),
// UBFX makes sense as an implementation of a 64-bit zero-extension too. Could
// use either 64-bit or 32-bit variant, but 32-bit might be more efficient.
def : Pat<(zext i32:$Rn), (SUBREG_TO_REG (i64 0), (UBFXwwii $Rn, 0, 31),
def : Pat<(i64 (zext i32:$Rn)), (SUBREG_TO_REG (i64 0), (UBFXwwii $Rn, 0, 31),
sub_32)>;
//===-------------------------------

View File

@ -4671,6 +4671,294 @@ defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpfmaxnm,
defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpfminnm,
int_aarch64_neon_vpfminnmq, FMINNMPvv_S_2S, FMINNMPvv_D_2D>;
def neon_uimm0_bare : Operand<i64>,
ImmLeaf<i64, [{return Imm == 0;}]> {
let ParserMatchClass = neon_uimm0_asmoperand;
let PrintMethod = "printUImmBareOperand";
}
def neon_uimm1_bare : Operand<i64>,
ImmLeaf<i64, [{(void)Imm; return true;}]> {
let ParserMatchClass = neon_uimm1_asmoperand;
let PrintMethod = "printUImmBareOperand";
}
def neon_uimm2_bare : Operand<i64>,
ImmLeaf<i64, [{(void)Imm; return true;}]> {
let ParserMatchClass = neon_uimm2_asmoperand;
let PrintMethod = "printUImmBareOperand";
}
def neon_uimm3_bare : Operand<i64>,
ImmLeaf<i64, [{(void)Imm; return true;}]> {
let ParserMatchClass = uimm3_asmoperand;
let PrintMethod = "printUImmBareOperand";
}
def neon_uimm4_bare : Operand<i64>,
ImmLeaf<i64, [{(void)Imm; return true;}]> {
let ParserMatchClass = uimm4_asmoperand;
let PrintMethod = "printUImmBareOperand";
}
// Scalar by element Arithmetic
class NeonI_ScalarXIndexedElemArith<string asmop, bits<4> opcode,
string rmlane, bit u, bit szhi, bit szlo,
RegisterClass ResFPR, RegisterClass OpFPR,
RegisterOperand OpVPR, Operand OpImm>
: NeonI_ScalarXIndexedElem<u, szhi, szlo, opcode,
(outs ResFPR:$Rd),
(ins OpFPR:$Rn, OpVPR:$MRm, OpImm:$Imm),
asmop # "\t$Rd, $Rn, $MRm" # rmlane # "[$Imm]",
[],
NoItinerary> {
bits<3> Imm;
bits<5> MRm;
}
class NeonI_ScalarXIndexedElemArith_Constraint_Impl<string asmop, bits<4> opcode,
string rmlane,
bit u, bit szhi, bit szlo,
RegisterClass ResFPR,
RegisterClass OpFPR,
RegisterOperand OpVPR,
Operand OpImm>
: NeonI_ScalarXIndexedElem<u, szhi, szlo, opcode,
(outs ResFPR:$Rd),
(ins ResFPR:$src, OpFPR:$Rn, OpVPR:$MRm, OpImm:$Imm),
asmop # "\t$Rd, $Rn, $MRm" # rmlane # "[$Imm]",
[],
NoItinerary> {
let Constraints = "$src = $Rd";
bits<3> Imm;
bits<5> MRm;
}
// Scalar Floating Point multiply (scalar, by element)
def FMULssv_4S : NeonI_ScalarXIndexedElemArith<"fmul",
0b1001, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> {
let Inst{11} = Imm{1}; // h
let Inst{21} = Imm{0}; // l
let Inst{20-16} = MRm;
}
def FMULddv_2D : NeonI_ScalarXIndexedElemArith<"fmul",
0b1001, ".d", 0b0, 0b1, 0b1, FPR64, FPR64, VPR128, neon_uimm1_bare> {
let Inst{11} = Imm{0}; // h
let Inst{21} = 0b0; // l
let Inst{20-16} = MRm;
}
// Scalar Floating Point multiply extended (scalar, by element)
def FMULXssv_4S : NeonI_ScalarXIndexedElemArith<"fmulx",
0b1001, ".s", 0b1, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> {
let Inst{11} = Imm{1}; // h
let Inst{21} = Imm{0}; // l
let Inst{20-16} = MRm;
}
def FMULXddv_2D : NeonI_ScalarXIndexedElemArith<"fmulx",
0b1001, ".d", 0b1, 0b1, 0b1, FPR64, FPR64, VPR128, neon_uimm1_bare> {
let Inst{11} = Imm{0}; // h
let Inst{21} = 0b0; // l
let Inst{20-16} = MRm;
}
// Scalar Floating Point fused multiply-add (scalar, by element)
def FMLAssv_4S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"fmla",
0b0001, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> {
let Inst{11} = Imm{1}; // h
let Inst{21} = Imm{0}; // l
let Inst{20-16} = MRm;
}
def FMLAddv_2D : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"fmla",
0b0001, ".d", 0b0, 0b1, 0b1, FPR64, FPR64, VPR128, neon_uimm1_bare> {
let Inst{11} = Imm{0}; // h
let Inst{21} = 0b0; // l
let Inst{20-16} = MRm;
}
// Scalar Floating Point fused multiply-subtract (scalar, by element)
def FMLSssv_4S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"fmls",
0b0101, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> {
let Inst{11} = Imm{1}; // h
let Inst{21} = Imm{0}; // l
let Inst{20-16} = MRm;
}
def FMLSddv_2D : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"fmls",
0b0101, ".d", 0b0, 0b1, 0b1, FPR64, FPR64, VPR128, neon_uimm1_bare> {
let Inst{11} = Imm{0}; // h
let Inst{21} = 0b0; // l
let Inst{20-16} = MRm;
}
// Scalar Signed saturating doubling multiply-add long (scalar, by element)
def SQDMLALshv_4H : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlal",
0b0011, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR64Lo, neon_uimm2_bare> {
let Inst{11} = 0b0; // h
let Inst{21} = Imm{1}; // l
let Inst{20} = Imm{0}; // m
let Inst{19-16} = MRm{3-0};
}
def SQDMLALshv_8H : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlal",
0b0011, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR128Lo, neon_uimm3_bare> {
let Inst{11} = Imm{2}; // h
let Inst{21} = Imm{1}; // l
let Inst{20} = Imm{0}; // m
let Inst{19-16} = MRm{3-0};
}
def SQDMLALdsv_2S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlal",
0b0011, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR64, neon_uimm1_bare> {
let Inst{11} = 0b0; // h
let Inst{21} = Imm{0}; // l
let Inst{20-16} = MRm;
}
def SQDMLALdsv_4S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlal",
0b0011, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR128, neon_uimm2_bare> {
let Inst{11} = Imm{1}; // h
let Inst{21} = Imm{0}; // l
let Inst{20-16} = MRm;
}
// Scalar Signed saturating doubling
// multiply-subtract long (scalar, by element)
def SQDMLSLshv_4H : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlsl",
0b0111, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR64Lo, neon_uimm2_bare> {
let Inst{11} = 0b0; // h
let Inst{21} = Imm{1}; // l
let Inst{20} = Imm{0}; // m
let Inst{19-16} = MRm{3-0};
}
def SQDMLSLshv_8H : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlsl",
0b0111, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR128Lo, neon_uimm3_bare> {
let Inst{11} = Imm{2}; // h
let Inst{21} = Imm{1}; // l
let Inst{20} = Imm{0}; // m
let Inst{19-16} = MRm{3-0};
}
def SQDMLSLdsv_2S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlsl",
0b0111, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR64, neon_uimm1_bare> {
let Inst{11} = 0b0; // h
let Inst{21} = Imm{0}; // l
let Inst{20-16} = MRm;
}
def SQDMLSLdsv_4S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlsl",
0b0111, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR128, neon_uimm2_bare> {
let Inst{11} = Imm{1}; // h
let Inst{21} = Imm{0}; // l
let Inst{20-16} = MRm;
}
// Scalar Signed saturating doubling multiply long (scalar, by element)
def SQDMULLshv_4H : NeonI_ScalarXIndexedElemArith<"sqdmull",
0b1011, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR64Lo, neon_uimm2_bare> {
let Inst{11} = 0b0; // h
let Inst{21} = Imm{1}; // l
let Inst{20} = Imm{0}; // m
let Inst{19-16} = MRm{3-0};
}
def SQDMULLshv_8H : NeonI_ScalarXIndexedElemArith<"sqdmull",
0b1011, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR128Lo, neon_uimm3_bare> {
let Inst{11} = Imm{2}; // h
let Inst{21} = Imm{1}; // l
let Inst{20} = Imm{0}; // m
let Inst{19-16} = MRm{3-0};
}
def SQDMULLdsv_2S : NeonI_ScalarXIndexedElemArith<"sqdmull",
0b1011, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR64, neon_uimm1_bare> {
let Inst{11} = 0b0; // h
let Inst{21} = Imm{0}; // l
let Inst{20-16} = MRm;
}
def SQDMULLdsv_4S : NeonI_ScalarXIndexedElemArith<"sqdmull",
0b1011, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR128, neon_uimm2_bare> {
let Inst{11} = Imm{1}; // h
let Inst{21} = Imm{0}; // l
let Inst{20-16} = MRm;
}
// Scalar Signed saturating doubling multiply returning
// high half (scalar, by element)
def SQDMULHhhv_4H : NeonI_ScalarXIndexedElemArith<"sqdmulh",
0b1100, ".h", 0b0, 0b0, 0b1, FPR16, FPR16, VPR64Lo, neon_uimm2_bare> {
let Inst{11} = 0b0; // h
let Inst{21} = Imm{1}; // l
let Inst{20} = Imm{0}; // m
let Inst{19-16} = MRm{3-0};
}
def SQDMULHhhv_8H : NeonI_ScalarXIndexedElemArith<"sqdmulh",
0b1100, ".h", 0b0, 0b0, 0b1, FPR16, FPR16, VPR128Lo, neon_uimm3_bare> {
let Inst{11} = Imm{2}; // h
let Inst{21} = Imm{1}; // l
let Inst{20} = Imm{0}; // m
let Inst{19-16} = MRm{3-0};
}
def SQDMULHssv_2S : NeonI_ScalarXIndexedElemArith<"sqdmulh",
0b1100, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR64, neon_uimm1_bare> {
let Inst{11} = 0b0; // h
let Inst{21} = Imm{0}; // l
let Inst{20-16} = MRm;
}
def SQDMULHssv_4S : NeonI_ScalarXIndexedElemArith<"sqdmulh",
0b1100, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> {
let Inst{11} = Imm{1}; // h
let Inst{21} = Imm{0}; // l
let Inst{20-16} = MRm;
}
// Scalar Signed saturating rounding doubling multiply
// returning high half (scalar, by element)
def SQRDMULHhhv_4H : NeonI_ScalarXIndexedElemArith<"sqrdmulh",
0b1101, ".h", 0b0, 0b0, 0b1, FPR16, FPR16, VPR64Lo, neon_uimm2_bare> {
let Inst{11} = 0b0; // h
let Inst{21} = Imm{1}; // l
let Inst{20} = Imm{0}; // m
let Inst{19-16} = MRm{3-0};
}
def SQRDMULHhhv_8H : NeonI_ScalarXIndexedElemArith<"sqrdmulh",
0b1101, ".h", 0b0, 0b0, 0b1, FPR16, FPR16, VPR128Lo, neon_uimm3_bare> {
let Inst{11} = Imm{2}; // h
let Inst{21} = Imm{1}; // l
let Inst{20} = Imm{0}; // m
let Inst{19-16} = MRm{3-0};
}
def SQRDMULHssv_2S : NeonI_ScalarXIndexedElemArith<"sqrdmulh",
0b1101, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR64, neon_uimm1_bare> {
let Inst{11} = 0b0; // h
let Inst{21} = Imm{0}; // l
let Inst{20-16} = MRm;
}
def SQRDMULHssv_4S : NeonI_ScalarXIndexedElemArith<"sqrdmulh",
0b1101, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> {
let Inst{11} = Imm{1}; // h
let Inst{21} = Imm{0}; // l
let Inst{20-16} = MRm;
}
// Scalar Copy - DUP element to scalar
class NeonI_Scalar_DUP<string asmop, string asmlane,
RegisterClass ResRC, RegisterOperand VPRC,
Operand OpImm>
: NeonI_ScalarCopy<(outs ResRC:$Rd), (ins VPRC:$Rn, OpImm:$Imm),
asmop # "\t$Rd, $Rn." # asmlane # "[$Imm]",
[],
NoItinerary> {
bits<4> Imm;
}
def DUPbv_B : NeonI_Scalar_DUP<"dup", "b", FPR8, VPR128, neon_uimm4_bare> {
let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
}
def DUPhv_H : NeonI_Scalar_DUP<"dup", "h", FPR16, VPR128, neon_uimm3_bare> {
let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
}
def DUPsv_S : NeonI_Scalar_DUP<"dup", "s", FPR32, VPR128, neon_uimm2_bare> {
let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
}
def DUPdv_D : NeonI_Scalar_DUP<"dup", "d", FPR64, VPR128, neon_uimm1_bare> {
let Inst{20-16} = {Imm, 0b1, 0b0, 0b0, 0b0};
}
//===----------------------------------------------------------------------===//
@ -4792,36 +5080,6 @@ def : Pat<(v2i64 (bitconvert (f128 FPR128:$src))), (v2i64 FPR128:$src)>;
def : Pat<(v4f32 (bitconvert (f128 FPR128:$src))), (v4f32 FPR128:$src)>;
def : Pat<(v2f64 (bitconvert (f128 FPR128:$src))), (v2f64 FPR128:$src)>;
def neon_uimm0_bare : Operand<i64>,
ImmLeaf<i64, [{return Imm == 0;}]> {
let ParserMatchClass = neon_uimm0_asmoperand;
let PrintMethod = "printUImmBareOperand";
}
def neon_uimm1_bare : Operand<i64>,
ImmLeaf<i64, [{(void)Imm; return true;}]> {
let ParserMatchClass = neon_uimm1_asmoperand;
let PrintMethod = "printUImmBareOperand";
}
def neon_uimm2_bare : Operand<i64>,
ImmLeaf<i64, [{(void)Imm; return true;}]> {
let ParserMatchClass = neon_uimm2_asmoperand;
let PrintMethod = "printUImmBareOperand";
}
def neon_uimm3_bare : Operand<i64>,
ImmLeaf<i64, [{(void)Imm; return true;}]> {
let ParserMatchClass = uimm3_asmoperand;
let PrintMethod = "printUImmBareOperand";
}
def neon_uimm4_bare : Operand<i64>,
ImmLeaf<i64, [{(void)Imm; return true;}]> {
let ParserMatchClass = uimm4_asmoperand;
let PrintMethod = "printUImmBareOperand";
}
def neon_uimm3 : Operand<i64>,
ImmLeaf<i64, [{(void)Imm; return true;}]> {
let ParserMatchClass = uimm3_asmoperand;

View File

@ -82,6 +82,8 @@ static DecodeStatus DecodeFPR32RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder);
static DecodeStatus DecodeFPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder);
static DecodeStatus DecodeFPR64LoRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder);
static DecodeStatus DecodeFPR128RegisterClass(llvm::MCInst &Inst,
unsigned RegNo, uint64_t Address,
const void *Decoder);
@ -379,6 +381,14 @@ DecodeFPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
return MCDisassembler::Success;
}
static DecodeStatus
DecodeFPR64LoRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder) {
if (RegNo > 15)
return MCDisassembler::Fail;
return DecodeFPR64RegisterClass(Inst, RegNo, Address, Decoder);
}
static DecodeStatus
DecodeFPR128RegisterClass(llvm::MCInst &Inst, unsigned RegNo,

View File

@ -1,4 +1,4 @@
// RUN: llvm-mc -triple=aarch64 -mattr=+neon -show-encoding < %s | FileCheck %s
// RUN: llvm-mc -triple=aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
// Check that the assembler can handle the documented syntax for AArch64

View File

@ -4667,7 +4667,7 @@
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: sqdmlal s17, h27, s12
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: error: too few operands for instruction
// CHECK-ERROR: sqdmlal d19, s24, d12
// CHECK-ERROR: ^
@ -4681,7 +4681,7 @@
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: sqdmlsl s14, h12, s25
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: error: too few operands for instruction
// CHECK-ERROR: sqdmlsl d12, s23, d13
// CHECK-ERROR: ^
@ -4695,7 +4695,7 @@
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: sqdmull s12, h22, s12
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: error: too few operands for instruction
// CHECK-ERROR: sqdmull d15, s22, d12
// CHECK-ERROR: ^
@ -5687,3 +5687,244 @@
// CHECK-ERROR <stdin>:4341:17: error: invalid operand for instruction
// CHECK-ERROR trn2 v0.1d, v1.1d, v2.1d
// CHECK-ERROR ^
//----------------------------------------------------------------------
// Floating Point multiply (scalar, by element)
//----------------------------------------------------------------------
// mismatched and invalid vector types
fmul s0, s1, v1.h[0]
fmul h0, h1, v1.s[0]
// invalid lane
fmul s2, s29, v10.s[4]
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: fmul s0, s1, v1.h[0]
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: fmul h0, h1, v1.s[0]
// CHECK-ERROR: ^
// CHECK-ERROR: error: lane number incompatible with layout
// CHECK-ERROR: fmul s2, s29, v10.s[4]
// CHECK-ERROR: ^
//----------------------------------------------------------------------
// Floating Point multiply extended (scalar, by element)
//----------------------------------------------------------------------
// mismatched and invalid vector types
fmulx d0, d1, v1.b[0]
fmulx h0, h1, v1.d[0]
// invalid lane
fmulx d2, d29, v10.d[3]
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: fmulx d0, d1, v1.b[0]
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: fmulx h0, h1, v1.d[0]
// CHECK-ERROR: ^
// CHECK-ERROR: error: lane number incompatible with layout
// CHECK-ERROR: fmulx d2, d29, v10.d[3]
// CHECK-ERROR: ^
//----------------------------------------------------------------------
// Floating Point fused multiply-add (scalar, by element)
//----------------------------------------------------------------------
// mismatched and invalid vector types
fmla b0, b1, v1.b[0]
fmla d30, s11, v1.d[1]
// invalid lane
fmla s16, s22, v16.s[5]
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: fmla b0, b1, v1.b[0]
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: fmla d30, s11, v1.d[1]
// CHECK-ERROR: ^
// CHECK-ERROR: error: lane number incompatible with layout
// CHECK-ERROR: fmla s16, s22, v16.s[5]
// CHECK-ERROR: ^
//----------------------------------------------------------------------
// Floating Point fused multiply-subtract (scalar, by element)
//----------------------------------------------------------------------
// mismatched and invalid vector types
fmls s29, h10, v28.s[1]
fmls h7, h17, v26.s[2]
// invalid lane
fmls d16, d22, v16.d[-1]
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: fmls s29, h10, v28.s[1]
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: fmls h7, h17, v26.s[2]
// CHECK-ERROR: ^
// CHECK-ERROR: error: expected lane number
// CHECK-ERROR: fmls d16, d22, v16.d[-1]
// CHECK-ERROR: ^
//----------------------------------------------------------------------
// Scalar Signed saturating doubling multiply-add long
// (scalar, by element)
//----------------------------------------------------------------------
// mismatched and invalid vector types
sqdmlal s0, h0, v0.s[0]
sqdmlal s8, s9, v14.s[1]
// invalid lane
sqdmlal s4, s5, v1.s[5]
// invalid vector index
sqdmlal s0, h0, v17.h[0]
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: sqdmlal s0, h0, v0.s[0]
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: sqdmlal s8, s9, v14.s[1]
// CHECK-ERROR: ^
// CHECK-ERROR: error: lane number incompatible with layout
// CHECK-ERROR: sqdmlal s4, s5, v1.s[5]
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: sqdmlal s0, h0, v17.h[0]
// CHECK-ERROR: ^
//----------------------------------------------------------------------
// Scalar Signed saturating doubling multiply-subtract long
// (scalar, by element)
//----------------------------------------------------------------------
// mismatched and invalid vector types
sqdmlsl s1, h1, v1.d[0]
sqdmlsl d1, h1, v13.s[0]
// invalid lane
sqdmlsl d1, s1, v13.s[4]
// invalid vector index
sqdmlsl s1, h1, v20.h[7]
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: sqdmlsl s1, h1, v1.d[0]
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: sqdmlsl d1, h1, v13.s[0]
// CHECK-ERROR: ^
// CHECK-ERROR: error: lane number incompatible with layout
// CHECK-ERROR: sqdmlsl d1, s1, v13.s[4]
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: sqdmlsl s1, h1, v20.h[7]
// CHECK-ERROR: ^
//----------------------------------------------------------------------
// Scalar Signed saturating doubling multiply long (scalar, by element)
//----------------------------------------------------------------------
// mismatched and invalid vector types
// invalid lane
// invalid vector index
// mismatched and invalid vector types
sqdmull s1, h1, v1.s[1]
sqdmull s1, s1, v4.s[0]
// invalid lane
sqdmull s12, h17, v9.h[9]
// invalid vector index
sqdmull s1, h1, v16.h[5]
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: sqdmull s1, h1, v1.s[1]
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: sqdmull s1, s1, v4.s[0]
// CHECK-ERROR: ^
// CHECK-ERROR: error: lane number incompatible with layout
// CHECK-ERROR: sqdmull s12, h17, v9.h[9]
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: sqdmull s1, h1, v16.h[5]
// CHECK-ERROR: ^
//----------------------------------------------------------------------
// Scalar Signed saturating doubling multiply returning
// high half (scalar, by element)
//----------------------------------------------------------------------
// mismatched and invalid vector types
sqdmulh h0, s1, v0.h[0]
sqdmulh s25, s26, v27.h[3]
// invalid lane
sqdmulh s25, s26, v27.s[4]
// invalid vector index
sqdmulh s0, h1, v30.h[0]
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: sqdmulh h0, s1, v0.h[0]
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: sqdmulh s25, s26, v27.h[3]
// CHECK-ERROR: ^
// CHECK-ERROR: error: lane number incompatible with layout
// CHECK-ERROR: sqdmulh s25, s26, v27.s[4]
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: sqdmulh s0, h1, v30.h[0]
// CHECK-ERROR: ^
//----------------------------------------------------------------------
// Scalar Signed saturating rounding doubling multiply
// returning high half (scalar, by element)
//----------------------------------------------------------------------
// mismatched and invalid vector types
sqrdmulh h31, h30, v14.s[2]
sqrdmulh s5, h6, v7.s[2]
// invalid lane
sqrdmulh h31, h30, v14.h[9]
// invalid vector index
sqrdmulh h31, h30, v20.h[4]
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: sqrdmulh h31, h30, v14.s[2]
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: sqrdmulh s5, h6, v7.s[2]
// CHECK-ERROR: ^
// CHECK-ERROR: error: lane number incompatible with layout
// CHECK-ERROR: sqrdmulh h31, h30, v14.h[9]
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: sqrdmulh h31, h30, v20.h[4]
// CHECK-ERROR: ^
//----------------------------------------------------------------------
// Scalar Duplicate element (scalar)
//----------------------------------------------------------------------
// mismatched and invalid vector types
dup b0, v1.d[0]
dup h0, v31.b[8]
dup s0, v2.h[4]
dup d0, v17.s[3]
// invalid lane
dup d0, v17.d[4]
dup s0, v1.s[7]
dup h0, v31.h[16]
dup b1, v3.b[16]
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: dup b0, v1.d[0]
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: dup h0, v31.b[8]
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: dup s0, v2.h[4]
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: dup d0, v17.s[3]
// CHECK-ERROR: ^
// CHECK-ERROR: error: lane number incompatible with layout
// CHECK-ERROR: dup d0, v17.d[4]
// CHECK-ERROR: ^
// CHECK-ERROR: error: lane number incompatible with layout
// CHECK-ERROR: dup s0, v1.s[7]
// CHECK-ERROR: ^
// CHECK-ERROR: error: lane number incompatible with layout
// CHECK-ERROR: dup h0, v31.h[16]
// CHECK-ERROR: ^
// CHECK-ERROR: error: lane number incompatible with layout
// CHECK-ERROR: dup b1, v3.b[16]
// CHECK-ERROR: ^

View File

@ -0,0 +1,44 @@
// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
//------------------------------------------------------------------------------
// Floating Point fused multiply-add (scalar, by element)
//------------------------------------------------------------------------------
fmla s0, s1, v1.s[0]
fmla s30, s11, v1.s[1]
fmla s4, s5, v7.s[2]
fmla s16, s22, v16.s[3]
fmla d0, d1, v1.d[0]
fmla d30, d11, v1.d[1]
// CHECK: fmla s0, s1, v1.s[0] // encoding: [0x20,0x10,0x81,0x5f]
// CHECK: fmla s30, s11, v1.s[1] // encoding: [0x7e,0x11,0xa1,0x5f]
// CHECK: fmla s4, s5, v7.s[2] // encoding: [0xa4,0x18,0x87,0x5f]
// CHECK: fmla s16, s22, v16.s[3] // encoding: [0xd0,0x1a,0xb0,0x5f]
// CHECK: fmla d0, d1, v1.d[0] // encoding: [0x20,0x10,0xc1,0x5f]
// CHECK: fmla d30, d11, v1.d[1] // encoding: [0x7e,0x19,0xc1,0x5f]
//------------------------------------------------------------------------------
// Floating Point fused multiply-subtract (scalar, by element)
//------------------------------------------------------------------------------
fmls s2, s3, v4.s[0]
fmls s29, s10, v28.s[1]
fmls s5, s12, v23.s[2]
fmls s7, s17, v26.s[3]
fmls d0, d1, v1.d[0]
fmls d30, d11, v1.d[1]
// CHECK: fmls s2, s3, v4.s[0] // encoding: [0x62,0x50,0x84,0x5f]
// CHECK: fmls s29, s10, v28.s[1] // encoding: [0x5d,0x51,0xbc,0x5f]
// CHECK: fmls s5, s12, v23.s[2] // encoding: [0x85,0x59,0x97,0x5f]
// CHECK: fmls s7, s17, v26.s[3] // encoding: [0x27,0x5a,0xba,0x5f]
// CHECK: fmls d0, d1, v1.d[0] // encoding: [0x20,0x50,0xc1,0x5f]
// CHECK: fmls d30, d11, v1.d[1] // encoding: [0x7e,0x59,0xc1,0x5f]

View File

@ -0,0 +1,37 @@
// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
//------------------------------------------------------------------------------
// Floating Point multiply (scalar, by element)
//------------------------------------------------------------------------------
fmul s0, s1, v1.s[0]
fmul s30, s11, v1.s[1]
fmul s4, s5, v7.s[2]
fmul s16, s22, v16.s[3]
fmul d0, d1, v1.d[0]
fmul d30, d11, v1.d[1]
// CHECK: fmul s0, s1, v1.s[0] // encoding: [0x20,0x90,0x81,0x5f]
// CHECK: fmul s30, s11, v1.s[1] // encoding: [0x7e,0x91,0xa1,0x5f]
// CHECK: fmul s4, s5, v7.s[2] // encoding: [0xa4,0x98,0x87,0x5f]
// CHECK: fmul s16, s22, v16.s[3] // encoding: [0xd0,0x9a,0xb0,0x5f]
// CHECK: fmul d0, d1, v1.d[0] // encoding: [0x20,0x90,0xc1,0x5f]
// CHECK: fmul d30, d11, v1.d[1] // encoding: [0x7e,0x99,0xc1,0x5f]
//------------------------------------------------------------------------------
// Floating Point multiply extended (scalar, by element)
//------------------------------------------------------------------------------
fmulx s6, s2, v8.s[0]
fmulx s7, s3, v13.s[1]
fmulx s9, s7, v9.s[2]
fmulx s13, s21, v10.s[3]
fmulx d15, d9, v7.d[0]
fmulx d13, d12, v11.d[1]
// CHECK: fmulx s6, s2, v8.s[0] // encoding: [0x46,0x90,0x88,0x7f]
// CHECK: fmulx s7, s3, v13.s[1] // encoding: [0x67,0x90,0xad,0x7f]
// CHECK: fmulx s9, s7, v9.s[2] // encoding: [0xe9,0x98,0x89,0x7f]
// CHECK: fmulx s13, s21, v10.s[3] // encoding: [0xad,0x9a,0xaa,0x7f]
// CHECK: fmulx d15, d9, v7.d[0] // encoding: [0x2f,0x91,0xc7,0x7f]
// CHECK: fmulx d13, d12, v11.d[1] // encoding: [0x8d,0x99,0xcb,0x7f]

View File

@ -0,0 +1,46 @@
// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
//-----------------------------------------------------------------------------
// Signed saturating doubling multiply-add long (scalar, by element)
//-----------------------------------------------------------------------------
sqdmlal s0, h0, v0.h[0]
sqdmlal s7, h1, v4.h[3]
sqdmlal s11, h16, v8.h[4]
sqdmlal s30, h30, v15.h[7]
sqdmlal d0, s0, v3.s[0]
sqdmlal d30, s30, v30.s[3]
sqdmlal d8, s9, v14.s[1]
// CHECK: sqdmlal s0, h0, v0.h[0] // encoding: [0x00,0x30,0x40,0x5f]
// CHECK: sqdmlal s7, h1, v4.h[3] // encoding: [0x27,0x30,0x74,0x5f]
// CHECK: sqdmlal s11, h16, v8.h[4] // encoding: [0x0b,0x3a,0x48,0x5f]
// CHECK: sqdmlal s30, h30, v15.h[7] // encoding: [0xde,0x3b,0x7f,0x5f]
// CHECK: sqdmlal d0, s0, v3.s[0] // encoding: [0x00,0x30,0x83,0x5f]
// CHECK: sqdmlal d30, s30, v30.s[3] // encoding: [0xde,0x3b,0xbe,0x5f]
// CHECK: sqdmlal d8, s9, v14.s[1] // encoding: [0x28,0x31,0xae,0x5f]
//-----------------------------------------------------------------------------
// Signed saturating doubling multiply-subtract long (scalar, by element)
//-----------------------------------------------------------------------------
sqdmlsl s1, h1, v1.h[0]
sqdmlsl s8, h2, v5.h[1]
sqdmlsl s12, h13, v14.h[2]
sqdmlsl s29, h28, v11.h[7]
sqdmlsl d1, s1, v13.s[0]
sqdmlsl d31, s31, v31.s[2]
sqdmlsl d16, s18, v28.s[3]
// CHECK: sqdmlsl s1, h1, v1.h[0] // encoding: [0x21,0x70,0x41,0x5f]
// CHECK: sqdmlsl s8, h2, v5.h[1] // encoding: [0x48,0x70,0x55,0x5f]
// CHECK: sqdmlsl s12, h13, v14.h[2] // encoding: [0xac,0x71,0x6e,0x5f]
// CHECK: sqdmlsl s29, h28, v11.h[7] // encoding: [0x9d,0x7b,0x7b,0x5f]
// CHECK: sqdmlsl d1, s1, v13.s[0] // encoding: [0x21,0x70,0x8d,0x5f]
// CHECK: sqdmlsl d31, s31, v31.s[2] // encoding: [0xff,0x7b,0x9f,0x5f]
// CHECK: sqdmlsl d16, s18, v28.s[3] // encoding: [0x50,0x7a,0xbc,0x5f]

View File

@ -0,0 +1,58 @@
// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
//-----------------------------------------------------------------------------
// Signed saturating doubling multiply long (scalar, by element)
//-----------------------------------------------------------------------------
sqdmull s1, h1, v1.h[1]
sqdmull s8, h2, v5.h[2]
sqdmull s12, h17, v9.h[3]
sqdmull s31, h31, v15.h[7]
sqdmull d1, s1, v4.s[0]
sqdmull d31, s31, v31.s[3]
sqdmull d9, s10, v15.s[0]
// CHECK: sqdmull s1, h1, v1.h[1] // encoding: [0x21,0xb0,0x51,0x5f]
// CHECK: sqdmull s8, h2, v5.h[2] // encoding: [0x48,0xb0,0x65,0x5f]
// CHECK: sqdmull s12, h17, v9.h[3] // encoding: [0x2c,0xb2,0x79,0x5f]
// CHECK: sqdmull s31, h31, v15.h[7] // encoding: [0xff,0xbb,0x7f,0x5f]
// CHECK: sqdmull d1, s1, v4.s[0] // encoding: [0x21,0xb0,0x84,0x5f]
// CHECK: sqdmull d31, s31, v31.s[3] // encoding: [0xff,0xbb,0xbf,0x5f]
// CHECK: sqdmull d9, s10, v15.s[0] // encoding: [0x49,0xb1,0x8f,0x5f]
//-----------------------------------------------------------------------------
// Scalar Signed saturating doubling multiply returning
// high half (scalar, by element)
//-----------------------------------------------------------------------------
sqdmulh h0, h1, v0.h[0]
sqdmulh h10, h11, v10.h[4]
sqdmulh h20, h21, v15.h[7]
sqdmulh s25, s26, v27.s[3]
sqdmulh s2, s6, v7.s[0]
// CHECK: sqdmulh h0, h1, v0.h[0] // encoding: [0x20,0xc0,0x40,0x5f]
// CHECK: sqdmulh h10, h11, v10.h[4] // encoding: [0x6a,0xc9,0x4a,0x5f]
// CHECK: sqdmulh h20, h21, v15.h[7] // encoding: [0xb4,0xca,0x7f,0x5f]
// CHECK: sqdmulh s25, s26, v27.s[3] // encoding: [0x59,0xcb,0xbb,0x5f]
// CHECK: sqdmulh s2, s6, v7.s[0] // encoding: [0xc2,0xc0,0x87,0x5f]
//-----------------------------------------------------------------------------
// Signed saturating rounding doubling multiply returning
// high half (scalar, by element)
//-----------------------------------------------------------------------------
sqrdmulh h31, h30, v14.h[2]
sqrdmulh h1, h1, v1.h[4]
sqrdmulh h21, h22, v15.h[7]
sqrdmulh s5, s6, v7.s[2]
sqrdmulh s20, s26, v27.s[1]
// CHECK: sqrdmulh h31, h30, v14.h[2] // encoding: [0xdf,0xd3,0x6e,0x5f]
// CHECK: sqrdmulh h1, h1, v1.h[4] // encoding: [0x21,0xd8,0x41,0x5f]
// CHECK: sqrdmulh h21, h22, v15.h[7] // encoding: [0xd5,0xda,0x7f,0x5f]
// CHECK: sqrdmulh s5, s6, v7.s[2] // encoding: [0xc5,0xd8,0x87,0x5f]
// CHECK: sqrdmulh s20, s26, v27.s[1] // encoding: [0x54,0xd3,0xbb,0x5f]

View File

@ -0,0 +1,29 @@
// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
//------------------------------------------------------------------------------
// Duplicate element (scalar)
//------------------------------------------------------------------------------
dup b0, v0.b[15]
dup b1, v0.b[7]
dup b17, v0.b[0]
dup h5, v31.h[7]
dup h9, v1.h[4]
dup h11, v17.h[0]
dup s2, v2.s[3]
dup s4, v21.s[0]
dup s31, v21.s[2]
dup d3, v5.d[0]
dup d6, v5.d[1]
// CHECK: dup b0, v0.b[15] // encoding: [0x00,0x04,0x1f,0x5e]
// CHECK: dup b1, v0.b[7] // encoding: [0x01,0x04,0x0f,0x5e]
// CHECK: dup b17, v0.b[0] // encoding: [0x11,0x04,0x01,0x5e]
// CHECK: dup h5, v31.h[7] // encoding: [0xe5,0x07,0x1e,0x5e]
// CHECK: dup h9, v1.h[4] // encoding: [0x29,0x04,0x12,0x5e]
// CHECK: dup h11, v17.h[0] // encoding: [0x2b,0x06,0x02,0x5e]
// CHECK: dup s2, v2.s[3] // encoding: [0x42,0x04,0x1c,0x5e]
// CHECK: dup s4, v21.s[0] // encoding: [0xa4,0x06,0x04,0x5e]
// CHECK: dup s31, v21.s[2] // encoding: [0xbf,0x06,0x14,0x5e]
// CHECK: dup d3, v5.d[0] // encoding: [0xa3,0x04,0x08,0x5e]
// CHECK: dup d6, v5.d[1] // encoding: [0xa6,0x04,0x18,0x5e]

View File

@ -1,4 +1,4 @@
// RUN: llvm-mc -triple=aarch64 -mattr=+neon -show-encoding < %s | FileCheck %s
// RUN: llvm-mc -triple=aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
// Check that the assembler can handle the documented syntax for AArch64

View File

@ -1,4 +1,4 @@
// RUN: llvm-mc -triple=aarch64 -mattr=+neon -show-encoding < %s | FileCheck %s
// RUN: llvm-mc -triple=aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
// Check that the assembler can handle the documented syntax for AArch64

View File

@ -2174,3 +2174,216 @@ G# RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -disassemble < %s |
0x28,0x78,0x82,0x0e
0x19,0x78,0x82,0x4e
0x0a,0x78,0xc2,0x4e
#----------------------------------------------------------------------
# Scalar Floating Point multiply (scalar, by element)
#----------------------------------------------------------------------
# CHECK: fmul s0, s1, v1.s[0]
# CHECK: fmul s0, s1, v1.s[3]
# CHECK: fmul d0, d1, v1.d[0]
# CHECK: fmul d0, d1, v1.d[1]
# CHECK: fmul d15, d15, v15.d[1]
0x20 0x90 0x81 0x5f
0x20 0x98 0xa1 0x5f
0x20 0x90 0xc1 0x5f
0x20 0x98 0xc1 0x5f
0xef 0x99 0xcf 0x5f
#----------------------------------------------------------------------
# Scalar Floating Point multiply extended (scalar, by element)
#----------------------------------------------------------------------
# CHECK: fmulx s3, s5, v7.s[0]
# CHECK: fmulx s3, s5, v7.s[3]
# CHECK: fmulx s3, s5, v15.s[3]
# CHECK: fmulx d0, d4, v8.d[0]
# CHECK: fmulx d0, d4, v8.d[1]
0xa3 0x90 0x87 0x7f
0xa3 0x98 0xa7 0x7f
0xa3 0x98 0xaf 0x7f
0x80 0x90 0xc8 0x7f
0x80 0x98 0xc8 0x7f
#----------------------------------------------------------------------
# Scalar Floating Point fused multiply-add (scalar, by element)
#----------------------------------------------------------------------
# CHECK: fmla s0, s1, v1.s[0]
# CHECK: fmla s0, s1, v1.s[3]
# CHECK: fmla d0, d1, v1.d[0]
# CHECK: fmla d0, d1, v1.d[1]
# CHECK: fmla d15, d15, v15.d[1]
0x20 0x10 0x81 0x5f
0x20 0x18 0xa1 0x5f
0x20 0x10 0xc1 0x5f
0x20 0x18 0xc1 0x5f
0xef 0x19 0xcf 0x5f
#----------------------------------------------------------------------
# Scalar Floating Point fused multiply-sub (scalar, by element)
#----------------------------------------------------------------------
# CHECK: fmls s3, s5, v7.s[0]
# CHECK: fmls s3, s5, v7.s[3]
# CHECK: fmls s3, s5, v15.s[3]
# CHECK: fmls d0, d4, v8.d[0]
# CHECK: fmls d0, d4, v8.d[1]
0xa3 0x50 0x87 0x5f
0xa3 0x58 0xa7 0x5f
0xa3 0x58 0xaf 0x5f
0x80 0x50 0xc8 0x5f
0x80 0x58 0xc8 0x5f
#----------------------------------------------------------------------
# Scalar Signed saturating doubling
# multiply-add long (scalar, by element)
#----------------------------------------------------------------------
# CHECK: sqdmlal s0, h0, v0.h[0]
# CHECK: sqdmlal s0, h0, v0.h[1]
# CHECK: sqdmlal s0, h0, v0.h[2]
# CHECK: sqdmlal s0, h0, v0.h[3]
# CHECK: sqdmlal s0, h0, v0.h[4]
# CHECK: sqdmlal s0, h0, v0.h[5]
# CHECK: sqdmlal s0, h0, v0.h[6]
# CHECK: sqdmlal s0, h0, v0.h[7]
# CHECK: sqdmlal d8, s9, v15.s[0]
# CHECK: sqdmlal d8, s9, v15.s[1]
# CHECK: sqdmlal d8, s9, v15.s[2]
# CHECK: sqdmlal d8, s9, v15.s[3]
0x00 0x30 0x40 0x5f
0x00 0x30 0x50 0x5f
0x00 0x30 0x60 0x5f
0x00 0x30 0x70 0x5f
0x00 0x38 0x40 0x5f
0x00 0x38 0x50 0x5f
0x00 0x38 0x60 0x5f
0x00 0x38 0x70 0x5f
0x28 0x31 0x8f 0x5f
0x28 0x31 0xaf 0x5f
0x28 0x39 0x8f 0x5f
0x28 0x39 0xaf 0x5f
#----------------------------------------------------------------------
# Scalar Signed saturating doubling
# multiply-sub long (scalar, by element)
#----------------------------------------------------------------------
# CHECK: sqdmlsl s0, h0, v0.h[0]
# CHECK: sqdmlsl s0, h0, v0.h[1]
# CHECK: sqdmlsl s0, h0, v0.h[2]
# CHECK: sqdmlsl s0, h0, v0.h[3]
# CHECK: sqdmlsl s0, h0, v0.h[4]
# CHECK: sqdmlsl s0, h0, v0.h[5]
# CHECK: sqdmlsl s0, h0, v0.h[6]
# CHECK: sqdmlsl s0, h0, v0.h[7]
# CHECK: sqdmlsl d8, s9, v15.s[0]
# CHECK: sqdmlsl d8, s9, v15.s[1]
# CHECK: sqdmlsl d8, s9, v15.s[2]
# CHECK: sqdmlsl d8, s9, v15.s[3]
0x00 0x70 0x40 0x5f
0x00 0x70 0x50 0x5f
0x00 0x70 0x60 0x5f
0x00 0x70 0x70 0x5f
0x00 0x78 0x40 0x5f
0x00 0x78 0x50 0x5f
0x00 0x78 0x60 0x5f
0x00 0x78 0x70 0x5f
0x28 0x71 0x8f 0x5f
0x28 0x71 0xaf 0x5f
0x28 0x79 0x8f 0x5f
0x28 0x79 0xaf 0x5f
#----------------------------------------------------------------------
# Scalar Signed saturating doubling multiply long (scalar, by element)
#----------------------------------------------------------------------
# CHECK: sqdmull s1, h1, v1.h[0]
# CHECK: sqdmull s1, h1, v1.h[1]
# CHECK: sqdmull s1, h1, v1.h[2]
# CHECK: sqdmull s1, h1, v1.h[3]
# CHECK: sqdmull s1, h1, v1.h[4]
# CHECK: sqdmull s1, h1, v1.h[5]
# CHECK: sqdmull s1, h1, v1.h[6]
# CHECK: sqdmull s1, h1, v1.h[7]
# CHECK: sqdmull d1, s1, v4.s[0]
# CHECK: sqdmull d1, s1, v4.s[1]
# CHECK: sqdmull d1, s1, v4.s[2]
# CHECK: sqdmull d1, s1, v4.s[3]
0x21 0xb0 0x41 0x5f
0x21 0xb0 0x51 0x5f
0x21 0xb0 0x61 0x5f
0x21 0xb0 0x71 0x5f
0x21 0xb8 0x41 0x5f
0x21 0xb8 0x51 0x5f
0x21 0xb8 0x61 0x5f
0x21 0xb8 0x71 0x5f
0x21 0xb0 0x84 0x5f
0x21 0xb0 0xa4 0x5f
0x21 0xb8 0x84 0x5f
0x21 0xb8 0xa4 0x5f
#----------------------------------------------------------------------
# Scalar Signed saturating doubling multiply returning
# high half (scalar, by element)
#----------------------------------------------------------------------
# CHECK: sqdmulh h7, h1, v14.h[0]
# CHECK: sqdmulh h7, h15, v8.h[1]
# CHECK: sqdmulh h7, h15, v8.h[2]
# CHECK: sqdmulh h7, h15, v8.h[3]
# CHECK: sqdmulh h7, h15, v8.h[4]
# CHECK: sqdmulh h7, h15, v8.h[5]
# CHECK: sqdmulh h7, h15, v8.h[6]
# CHECK: sqdmulh h7, h15, v8.h[7]
# CHECK: sqdmulh s15, s3, v4.s[0]
# CHECK: sqdmulh s15, s14, v16.s[1]
# CHECK: sqdmulh s15, s15, v16.s[2]
# CHECK: sqdmulh s15, s16, v17.s[3]
0x27 0xc0 0x4e 0x5f
0xe7 0xc1 0x58 0x5f
0xe7 0xc1 0x68 0x5f
0xe7 0xc1 0x78 0x5f
0xe7 0xc9 0x48 0x5f
0xe7 0xc9 0x58 0x5f
0xe7 0xc9 0x68 0x5f
0xe7 0xc9 0x78 0x5f
0x6f 0xc0 0x84 0x5f
0xcf 0xc1 0xb0 0x5f
0xef 0xc9 0x90 0x5f
0x0f 0xca 0xb1 0x5f
#----------------------------------------------------------------------
# Scalar Signed saturating rounding doubling multiply
# returning high half (scalar, by element)
#----------------------------------------------------------------------
# CHECK: sqrdmulh h7, h1, v14.h[0]
# CHECK: sqrdmulh h7, h15, v8.h[1]
# CHECK: sqrdmulh h7, h15, v8.h[2]
# CHECK: sqrdmulh h7, h15, v8.h[3]
# CHECK: sqrdmulh h7, h15, v8.h[4]
# CHECK: sqrdmulh h7, h15, v8.h[5]
# CHECK: sqrdmulh h7, h15, v8.h[6]
# CHECK: sqrdmulh h7, h15, v8.h[7]
# CHECK: sqrdmulh s15, s3, v4.s[0]
# CHECK: sqrdmulh s15, s14, v16.s[1]
# CHECK: sqrdmulh s15, s15, v16.s[2]
# CHECK: sqrdmulh s15, s16, v17.s[3]
0x27 0xd0 0x4e 0x5f
0xe7 0xd1 0x58 0x5f
0xe7 0xd1 0x68 0x5f
0xe7 0xd1 0x78 0x5f
0xe7 0xd9 0x48 0x5f
0xe7 0xd9 0x58 0x5f
0xe7 0xd9 0x68 0x5f
0xe7 0xd9 0x78 0x5f
0x6f 0xd0 0x84 0x5f
0xcf 0xd1 0xb0 0x5f
0xef 0xd9 0x90 0x5f
0x0f 0xda 0xb1 0x5f
#----------------------------------------------------------------------
#Duplicate element (scalar)
#----------------------------------------------------------------------
# CHECK: dup b0, v0.b[15]
# CHECK: dup h2, v31.h[5]
# CHECK: dup s17, v2.s[2]
# CHECK: dup d6, v12.d[1]
0x00 0x04 0x1f 0x5e
0xe2 0x07 0x16 0x5e
0x51 0x04 0x14 0x5e
0x86 0x05 0x18 0x5e