[AArch64] Model Cortex-A55 Q register NEON instructions

Cortex-A55 has 2 64bit NEON vector units, meaning a 128bit instruction
requires taking both units (and can only be issued as the first
instruction in a dual issue pair). This patch models that by splitting
the WriteV SchedWrite into two - the WriteVd that reads/writes only
64bit operands, and the WriteVq that read/writes 128bit registers. The
A55 schedule then uses this distinction to model the WriteVq as taking
both resource units, and starting a Schedule Group and WriteVd as taking
one as before.

I believe this is more correct, even if it does not lead to much better
performance.

Differential Revision: https://reviews.llvm.org/D108766
This commit is contained in:
David Green 2021-09-29 16:55:31 +01:00
parent 9b10e2b1cf
commit e9adcbde31
18 changed files with 1053 additions and 1024 deletions

View File

@ -5310,7 +5310,7 @@ class BaseSIMDThreeSameVector<bit Q, bit U, bits<3> size, bits<5> opcode,
: I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm), asm, : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm), asm,
"{\t$Rd" # kind # ", $Rn" # kind # ", $Rm" # kind # "{\t$Rd" # kind # ", $Rn" # kind # ", $Rm" # kind #
"|" # kind # "\t$Rd, $Rn, $Rm|}", "", pattern>, "|" # kind # "\t$Rd, $Rn, $Rm|}", "", pattern>,
Sched<[WriteV]> { Sched<[!if(Q, WriteVq, WriteVd)]> {
bits<5> Rd; bits<5> Rd;
bits<5> Rn; bits<5> Rn;
bits<5> Rm; bits<5> Rm;
@ -5333,7 +5333,7 @@ class BaseSIMDThreeSameVectorTied<bit Q, bit U, bits<3> size, bits<5> opcode,
: I<(outs regtype:$dst), (ins regtype:$Rd, regtype:$Rn, regtype:$Rm), asm, : I<(outs regtype:$dst), (ins regtype:$Rd, regtype:$Rn, regtype:$Rm), asm,
"{\t$Rd" # kind # ", $Rn" # kind # ", $Rm" # kind # "{\t$Rd" # kind # ", $Rn" # kind # ", $Rm" # kind #
"|" # kind # "\t$Rd, $Rn, $Rm}", "$Rd = $dst", pattern>, "|" # kind # "\t$Rd, $Rn, $Rm}", "$Rd = $dst", pattern>,
Sched<[WriteV]> { Sched<[!if(Q, WriteVq, WriteVd)]> {
bits<5> Rd; bits<5> Rd;
bits<5> Rn; bits<5> Rn;
bits<5> Rm; bits<5> Rm;
@ -5352,7 +5352,7 @@ class BaseSIMDThreeSameVectorTied<bit Q, bit U, bits<3> size, bits<5> opcode,
let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
class BaseSIMDThreeSameVectorPseudo<RegisterOperand regtype, list<dag> pattern> class BaseSIMDThreeSameVectorPseudo<RegisterOperand regtype, list<dag> pattern>
: Pseudo<(outs regtype:$dst), (ins regtype:$Rd, regtype:$Rn, regtype:$Rm), pattern>, : Pseudo<(outs regtype:$dst), (ins regtype:$Rd, regtype:$Rn, regtype:$Rm), pattern>,
Sched<[WriteV]>; Sched<[!if(!eq(regtype, V128), WriteVq, WriteVd)]>;
multiclass SIMDLogicalThreeVectorPseudo<SDPatternOperator OpNode> { multiclass SIMDLogicalThreeVectorPseudo<SDPatternOperator OpNode> {
def v8i8 : BaseSIMDThreeSameVectorPseudo<V64, def v8i8 : BaseSIMDThreeSameVectorPseudo<V64,
@ -5705,7 +5705,7 @@ class BaseSIMDTwoSameVector<bit Q, bit U, bits<2> size, bits<5> opcode,
: I<(outs regtype:$Rd), (ins regtype:$Rn), asm, : I<(outs regtype:$Rd), (ins regtype:$Rn), asm,
"{\t$Rd" # dstkind # ", $Rn" # srckind # "{\t$Rd" # dstkind # ", $Rn" # srckind #
"|" # dstkind # "\t$Rd, $Rn}", "", pattern>, "|" # dstkind # "\t$Rd, $Rn}", "", pattern>,
Sched<[WriteV]> { Sched<[!if(Q, WriteVq, WriteVd)]> {
bits<5> Rd; bits<5> Rd;
bits<5> Rn; bits<5> Rn;
let Inst{31} = 0; let Inst{31} = 0;
@ -5730,7 +5730,7 @@ class BaseSIMDTwoSameVectorTied<bit Q, bit U, bits<2> size, bits<5> opcode,
: I<(outs regtype:$dst), (ins regtype:$Rd, regtype:$Rn), asm, : I<(outs regtype:$dst), (ins regtype:$Rd, regtype:$Rn), asm,
"{\t$Rd" # dstkind # ", $Rn" # srckind # "{\t$Rd" # dstkind # ", $Rn" # srckind #
"|" # dstkind # "\t$Rd, $Rn}", "$Rd = $dst", pattern>, "|" # dstkind # "\t$Rd, $Rn}", "$Rd = $dst", pattern>,
Sched<[WriteV]> { Sched<[!if(Q, WriteVq, WriteVd)]> {
bits<5> Rd; bits<5> Rd;
bits<5> Rn; bits<5> Rn;
let Inst{31} = 0; let Inst{31} = 0;
@ -5776,7 +5776,7 @@ class BaseSIMDVectorLShiftLongBySize<bit Q, bits<2> size,
: I<(outs V128:$Rd), (ins regtype:$Rn), asm, : I<(outs V128:$Rd), (ins regtype:$Rn), asm,
"{\t$Rd" # dstkind # ", $Rn" # srckind # ", #" # amount # "{\t$Rd" # dstkind # ", $Rn" # srckind # ", #" # amount #
"|" # dstkind # "\t$Rd, $Rn, #" # amount # "}", "", []>, "|" # dstkind # "\t$Rd, $Rn, #" # amount # "}", "", []>,
Sched<[WriteV]> { Sched<[WriteVq]> {
bits<5> Rd; bits<5> Rd;
bits<5> Rn; bits<5> Rn;
let Inst{31} = 0; let Inst{31} = 0;
@ -6041,7 +6041,7 @@ class BaseSIMDMixedTwoVector<bit Q, bit U, bits<2> size, bits<5> opcode,
: I<(outs outreg:$Rd), (ins inreg:$Rn), asm, : I<(outs outreg:$Rd), (ins inreg:$Rn), asm,
"{\t$Rd" # outkind # ", $Rn" # inkind # "{\t$Rd" # outkind # ", $Rn" # inkind #
"|" # outkind # "\t$Rd, $Rn}", "", pattern>, "|" # outkind # "\t$Rd, $Rn}", "", pattern>,
Sched<[WriteV]> { Sched<[WriteVq]> {
bits<5> Rd; bits<5> Rd;
bits<5> Rn; bits<5> Rn;
let Inst{31} = 0; let Inst{31} = 0;
@ -6064,7 +6064,7 @@ class BaseSIMDMixedTwoVectorTied<bit Q, bit U, bits<2> size, bits<5> opcode,
: I<(outs outreg:$dst), (ins outreg:$Rd, inreg:$Rn), asm, : I<(outs outreg:$dst), (ins outreg:$Rd, inreg:$Rn), asm,
"{\t$Rd" # outkind # ", $Rn" # inkind # "{\t$Rd" # outkind # ", $Rn" # inkind #
"|" # outkind # "\t$Rd, $Rn}", "$Rd = $dst", pattern>, "|" # outkind # "\t$Rd, $Rn}", "$Rd = $dst", pattern>,
Sched<[WriteV]> { Sched<[WriteVq]> {
bits<5> Rd; bits<5> Rd;
bits<5> Rn; bits<5> Rn;
let Inst{31} = 0; let Inst{31} = 0;
@ -6116,7 +6116,7 @@ class BaseSIMDCmpTwoVector<bit Q, bit U, bits<2> size, bits<2> size2,
"{\t$Rd" # kind # ", $Rn" # kind # ", #" # zero # "{\t$Rd" # kind # ", $Rn" # kind # ", #" # zero #
"|" # kind # "\t$Rd, $Rn, #" # zero # "}", "", "|" # kind # "\t$Rd, $Rn, #" # zero # "}", "",
[(set (dty regtype:$Rd), (OpNode (sty regtype:$Rn)))]>, [(set (dty regtype:$Rd), (OpNode (sty regtype:$Rn)))]>,
Sched<[WriteV]> { Sched<[!if(Q, WriteVq, WriteVd)]> {
bits<5> Rd; bits<5> Rd;
bits<5> Rn; bits<5> Rn;
let Inst{31} = 0; let Inst{31} = 0;
@ -6214,7 +6214,7 @@ class BaseSIMDFPCvtTwoVector<bit Q, bit U, bits<2> size, bits<5> opcode,
list<dag> pattern> list<dag> pattern>
: I<(outs outtype:$Rd), (ins intype:$Rn), asm, : I<(outs outtype:$Rd), (ins intype:$Rn), asm,
!strconcat("\t$Rd", VdTy, ", $Rn", VnTy), "", pattern>, !strconcat("\t$Rd", VdTy, ", $Rn", VnTy), "", pattern>,
Sched<[WriteV]> { Sched<[WriteVq]> {
bits<5> Rd; bits<5> Rd;
bits<5> Rn; bits<5> Rn;
let Inst{31} = 0; let Inst{31} = 0;
@ -6236,7 +6236,7 @@ class BaseSIMDFPCvtTwoVectorTied<bit Q, bit U, bits<2> size, bits<5> opcode,
list<dag> pattern> list<dag> pattern>
: I<(outs outtype:$dst), (ins outtype:$Rd, intype:$Rn), asm, : I<(outs outtype:$dst), (ins outtype:$Rd, intype:$Rn), asm,
!strconcat("\t$Rd", VdTy, ", $Rn", VnTy), "$Rd = $dst", pattern>, !strconcat("\t$Rd", VdTy, ", $Rn", VnTy), "$Rd = $dst", pattern>,
Sched<[WriteV]> { Sched<[WriteVq]> {
bits<5> Rd; bits<5> Rd;
bits<5> Rn; bits<5> Rn;
let Inst{31} = 0; let Inst{31} = 0;
@ -6299,7 +6299,7 @@ class BaseSIMDDifferentThreeVector<bit U, bits<3> size, bits<4> opcode,
: I<(outs outtype:$Rd), (ins intype1:$Rn, intype2:$Rm), asm, : I<(outs outtype:$Rd), (ins intype1:$Rn, intype2:$Rm), asm,
"{\t$Rd" # outkind # ", $Rn" # inkind1 # ", $Rm" # inkind2 # "{\t$Rd" # outkind # ", $Rn" # inkind1 # ", $Rm" # inkind2 #
"|" # outkind # "\t$Rd, $Rn, $Rm}", "", pattern>, "|" # outkind # "\t$Rd, $Rn, $Rm}", "", pattern>,
Sched<[WriteV]> { Sched<[WriteVq]> {
bits<5> Rd; bits<5> Rd;
bits<5> Rn; bits<5> Rn;
bits<5> Rm; bits<5> Rm;
@ -6325,7 +6325,7 @@ class BaseSIMDDifferentThreeVectorTied<bit U, bits<3> size, bits<4> opcode,
: I<(outs outtype:$dst), (ins outtype:$Rd, intype1:$Rn, intype2:$Rm), asm, : I<(outs outtype:$dst), (ins outtype:$Rd, intype1:$Rn, intype2:$Rm), asm,
"{\t$Rd" # outkind # ", $Rn" # inkind1 # ", $Rm" # inkind2 # "{\t$Rd" # outkind # ", $Rn" # inkind1 # ", $Rm" # inkind2 #
"|" # outkind # "\t$Rd, $Rn, $Rm}", "$Rd = $dst", pattern>, "|" # outkind # "\t$Rd, $Rn, $Rm}", "$Rd = $dst", pattern>,
Sched<[WriteV]> { Sched<[WriteVq]> {
bits<5> Rd; bits<5> Rd;
bits<5> Rn; bits<5> Rn;
bits<5> Rm; bits<5> Rm;
@ -6665,7 +6665,7 @@ class BaseSIMDBitwiseExtract<bit size, RegisterOperand regtype, ValueType vty,
"|" # kind # "\t$Rd, $Rn, $Rm, $imm}", "", "|" # kind # "\t$Rd, $Rn, $Rm, $imm}", "",
[(set (vty regtype:$Rd), [(set (vty regtype:$Rd),
(AArch64ext regtype:$Rn, regtype:$Rm, (i32 imm:$imm)))]>, (AArch64ext regtype:$Rn, regtype:$Rm, (i32 imm:$imm)))]>,
Sched<[WriteV]> { Sched<[!if(size, WriteVq, WriteVd)]> {
bits<5> Rd; bits<5> Rd;
bits<5> Rn; bits<5> Rn;
bits<5> Rm; bits<5> Rm;
@ -6699,7 +6699,7 @@ class BaseSIMDZipVector<bits<3> size, bits<3> opc, RegisterOperand regtype,
"{\t$Rd" # kind # ", $Rn" # kind # ", $Rm" # kind # "{\t$Rd" # kind # ", $Rn" # kind # ", $Rm" # kind #
"|" # kind # "\t$Rd, $Rn, $Rm}", "", "|" # kind # "\t$Rd, $Rn, $Rm}", "",
[(set (valty regtype:$Rd), (OpNode regtype:$Rn, regtype:$Rm))]>, [(set (valty regtype:$Rd), (OpNode regtype:$Rn, regtype:$Rm))]>,
Sched<[WriteV]> { Sched<[!if(!eq(regtype, V128), WriteVq, WriteVd)]> {
bits<5> Rd; bits<5> Rd;
bits<5> Rn; bits<5> Rn;
bits<5> Rm; bits<5> Rm;
@ -6755,7 +6755,7 @@ class BaseSIMDThreeScalar<bit U, bits<3> size, bits<5> opcode,
list<dag> pattern> list<dag> pattern>
: I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm), asm, : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm), asm,
"\t$Rd, $Rn, $Rm", "", pattern>, "\t$Rd, $Rn, $Rm", "", pattern>,
Sched<[WriteV]> { Sched<[WriteVd]> {
bits<5> Rd; bits<5> Rd;
bits<5> Rn; bits<5> Rn;
bits<5> Rm; bits<5> Rm;
@ -6775,7 +6775,7 @@ class BaseSIMDThreeScalarTied<bit U, bits<2> size, bit R, bits<5> opcode,
dag oops, dag iops, string asm, dag oops, dag iops, string asm,
list<dag> pattern> list<dag> pattern>
: I<oops, iops, asm, "\t$Rd, $Rn, $Rm", "$Rd = $dst", pattern>, : I<oops, iops, asm, "\t$Rd, $Rn, $Rm", "$Rd = $dst", pattern>,
Sched<[WriteV]> { Sched<[WriteVd]> {
bits<5> Rd; bits<5> Rd;
bits<5> Rn; bits<5> Rn;
bits<5> Rm; bits<5> Rm;
@ -6868,7 +6868,7 @@ class BaseSIMDThreeScalarMixed<bit U, bits<2> size, bits<5> opcode,
dag oops, dag iops, string asm, string cstr, list<dag> pat> dag oops, dag iops, string asm, string cstr, list<dag> pat>
: I<oops, iops, asm, : I<oops, iops, asm,
"\t$Rd, $Rn, $Rm", cstr, pat>, "\t$Rd, $Rn, $Rm", cstr, pat>,
Sched<[WriteV]> { Sched<[WriteVd]> {
bits<5> Rd; bits<5> Rd;
bits<5> Rn; bits<5> Rn;
bits<5> Rm; bits<5> Rm;
@ -6921,7 +6921,7 @@ class BaseSIMDTwoScalar<bit U, bits<2> size, bits<2> size2, bits<5> opcode,
string asm, list<dag> pat> string asm, list<dag> pat>
: I<(outs regtype:$Rd), (ins regtype2:$Rn), asm, : I<(outs regtype:$Rd), (ins regtype2:$Rn), asm,
"\t$Rd, $Rn", "", pat>, "\t$Rd, $Rn", "", pat>,
Sched<[WriteV]> { Sched<[WriteVd]> {
bits<5> Rd; bits<5> Rd;
bits<5> Rn; bits<5> Rn;
let Inst{31-30} = 0b01; let Inst{31-30} = 0b01;
@ -6943,7 +6943,7 @@ class BaseSIMDTwoScalarTied<bit U, bits<2> size, bits<5> opcode,
string asm, list<dag> pat> string asm, list<dag> pat>
: I<(outs regtype:$dst), (ins regtype:$Rd, regtype2:$Rn), asm, : I<(outs regtype:$dst), (ins regtype:$Rd, regtype2:$Rn), asm,
"\t$Rd, $Rn", "$Rd = $dst", pat>, "\t$Rd, $Rn", "$Rd = $dst", pat>,
Sched<[WriteV]> { Sched<[WriteVd]> {
bits<5> Rd; bits<5> Rd;
bits<5> Rn; bits<5> Rn;
let Inst{31-30} = 0b01; let Inst{31-30} = 0b01;
@ -6963,7 +6963,7 @@ class BaseSIMDCmpTwoScalar<bit U, bits<2> size, bits<2> size2, bits<5> opcode,
RegisterClass regtype, string asm, string zero> RegisterClass regtype, string asm, string zero>
: I<(outs regtype:$Rd), (ins regtype:$Rn), asm, : I<(outs regtype:$Rd), (ins regtype:$Rn), asm,
"\t$Rd, $Rn, #" # zero, "", []>, "\t$Rd, $Rn, #" # zero, "", []>,
Sched<[WriteV]> { Sched<[WriteVd]> {
bits<5> Rd; bits<5> Rd;
bits<5> Rn; bits<5> Rn;
let Inst{31-30} = 0b01; let Inst{31-30} = 0b01;
@ -6982,7 +6982,7 @@ class BaseSIMDCmpTwoScalar<bit U, bits<2> size, bits<2> size2, bits<5> opcode,
class SIMDInexactCvtTwoScalar<bits<5> opcode, string asm> class SIMDInexactCvtTwoScalar<bits<5> opcode, string asm>
: I<(outs FPR32:$Rd), (ins FPR64:$Rn), asm, "\t$Rd, $Rn", "", : I<(outs FPR32:$Rd), (ins FPR64:$Rn), asm, "\t$Rd, $Rn", "",
[(set (f32 FPR32:$Rd), (int_aarch64_sisd_fcvtxn (f64 FPR64:$Rn)))]>, [(set (f32 FPR32:$Rd), (int_aarch64_sisd_fcvtxn (f64 FPR64:$Rn)))]>,
Sched<[WriteV]> { Sched<[WriteVd]> {
bits<5> Rd; bits<5> Rd;
bits<5> Rn; bits<5> Rn;
let Inst{31-17} = 0b011111100110000; let Inst{31-17} = 0b011111100110000;
@ -7104,7 +7104,7 @@ class BaseSIMDPairwiseScalar<bit U, bits<2> size, bits<5> opcode,
string asm, string kind> string asm, string kind>
: I<(outs regtype:$Rd), (ins vectype:$Rn), asm, : I<(outs regtype:$Rd), (ins vectype:$Rn), asm,
"{\t$Rd, $Rn" # kind # "|" # kind # "\t$Rd, $Rn}", "", []>, "{\t$Rd, $Rn" # kind # "|" # kind # "\t$Rd, $Rn}", "", []>,
Sched<[WriteV]> { Sched<[WriteVd]> {
bits<5> Rd; bits<5> Rd;
bits<5> Rn; bits<5> Rn;
let Inst{31-30} = 0b01; let Inst{31-30} = 0b01;
@ -7144,7 +7144,7 @@ class BaseSIMDAcrossLanes<bit Q, bit U, bits<2> size, bits<5> opcode,
string asm, string kind, list<dag> pattern> string asm, string kind, list<dag> pattern>
: I<(outs regtype:$Rd), (ins vectype:$Rn), asm, : I<(outs regtype:$Rd), (ins vectype:$Rn), asm,
"{\t$Rd, $Rn" # kind # "|" # kind # "\t$Rd, $Rn}", "", pattern>, "{\t$Rd, $Rn" # kind # "|" # kind # "\t$Rd, $Rn}", "", pattern>,
Sched<[WriteV]> { Sched<[!if(Q, WriteVq, WriteVd)]> {
bits<5> Rd; bits<5> Rd;
bits<5> Rn; bits<5> Rn;
let Inst{31} = 0; let Inst{31} = 0;
@ -7210,7 +7210,7 @@ multiclass SIMDFPAcrossLanes<bits<5> opcode, bit sz1, string asm,
class BaseSIMDInsDup<bit Q, bit op, dag outs, dag ins, string asm, class BaseSIMDInsDup<bit Q, bit op, dag outs, dag ins, string asm,
string operands, string constraints, list<dag> pattern> string operands, string constraints, list<dag> pattern>
: I<outs, ins, asm, operands, constraints, pattern>, : I<outs, ins, asm, operands, constraints, pattern>,
Sched<[WriteV]> { Sched<[!if(Q, WriteVq, WriteVd)]> {
bits<5> Rd; bits<5> Rd;
bits<5> Rn; bits<5> Rn;
let Inst{31} = 0; let Inst{31} = 0;
@ -7522,7 +7522,7 @@ class BaseSIMDTableLookup<bit Q, bits<2> len, bit op, RegisterOperand vectype,
RegisterOperand listtype, string asm, string kind> RegisterOperand listtype, string asm, string kind>
: I<(outs vectype:$Vd), (ins listtype:$Vn, vectype:$Vm), asm, : I<(outs vectype:$Vd), (ins listtype:$Vn, vectype:$Vm), asm,
"\t$Vd" # kind # ", $Vn, $Vm" # kind, "", []>, "\t$Vd" # kind # ", $Vn, $Vm" # kind, "", []>,
Sched<[WriteV]> { Sched<[!if(Q, WriteVq, WriteVd)]> {
bits<5> Vd; bits<5> Vd;
bits<5> Vn; bits<5> Vn;
bits<5> Vm; bits<5> Vm;
@ -7543,7 +7543,7 @@ class BaseSIMDTableLookupTied<bit Q, bits<2> len, bit op, RegisterOperand vectyp
RegisterOperand listtype, string asm, string kind> RegisterOperand listtype, string asm, string kind>
: I<(outs vectype:$dst), (ins vectype:$Vd, listtype:$Vn, vectype:$Vm), asm, : I<(outs vectype:$dst), (ins vectype:$Vd, listtype:$Vn, vectype:$Vm), asm,
"\t$Vd" # kind # ", $Vn, $Vm" # kind, "$Vd = $dst", []>, "\t$Vd" # kind # ", $Vn, $Vm" # kind, "$Vd = $dst", []>,
Sched<[WriteV]> { Sched<[!if(Q, WriteVq, WriteVd)]> {
bits<5> Vd; bits<5> Vd;
bits<5> Vn; bits<5> Vn;
bits<5> Vm; bits<5> Vm;
@ -7662,7 +7662,7 @@ class BaseSIMDScalarCPY<RegisterClass regtype, RegisterOperand vectype,
: I<(outs regtype:$dst), (ins vectype:$src, idxtype:$idx), asm, : I<(outs regtype:$dst), (ins vectype:$src, idxtype:$idx), asm,
"{\t$dst, $src" # kind # "$idx" # "{\t$dst, $src" # kind # "$idx" #
"|\t$dst, $src$idx}", "", []>, "|\t$dst, $src$idx}", "", []>,
Sched<[WriteV]> { Sched<[WriteVd]> {
bits<5> dst; bits<5> dst;
bits<5> src; bits<5> src;
let Inst{31-21} = 0b01011110000; let Inst{31-21} = 0b01011110000;
@ -7727,7 +7727,7 @@ class BaseSIMDModifiedImm<bit Q, bit op, bit op2, dag oops, dag iops,
string asm, string op_string, string asm, string op_string,
string cstr, list<dag> pattern> string cstr, list<dag> pattern>
: I<oops, iops, asm, op_string, cstr, pattern>, : I<oops, iops, asm, op_string, cstr, pattern>,
Sched<[WriteV]> { Sched<[!if(Q, WriteVq, WriteVd)]> {
bits<5> Rd; bits<5> Rd;
bits<8> imm8; bits<8> imm8;
let Inst{31} = 0; let Inst{31} = 0;
@ -7897,7 +7897,7 @@ class BaseSIMDIndexed<bit Q, bit U, bit Scalar, bits<2> size, bits<4> opc,
asm, asm,
"{\t$Rd" # dst_kind # ", $Rn" # lhs_kind # ", $Rm" # rhs_kind # "$idx" # "{\t$Rd" # dst_kind # ", $Rn" # lhs_kind # ", $Rm" # rhs_kind # "$idx" #
"|" # apple_kind # "\t$Rd, $Rn, $Rm$idx}", "", pattern>, "|" # apple_kind # "\t$Rd, $Rn, $Rm$idx}", "", pattern>,
Sched<[WriteV]> { Sched<[WriteVd]> {
bits<5> Rd; bits<5> Rd;
bits<5> Rn; bits<5> Rn;
bits<5> Rm; bits<5> Rm;
@ -7927,7 +7927,7 @@ class BaseSIMDIndexedTied<bit Q, bit U, bit Scalar, bits<2> size, bits<4> opc,
(ins dst_reg:$Rd, lhs_reg:$Rn, rhs_reg:$Rm, vec_idx:$idx), asm, (ins dst_reg:$Rd, lhs_reg:$Rn, rhs_reg:$Rm, vec_idx:$idx), asm,
"{\t$Rd" # dst_kind # ", $Rn" # lhs_kind # ", $Rm" # rhs_kind # "$idx" # "{\t$Rd" # dst_kind # ", $Rn" # lhs_kind # ", $Rm" # rhs_kind # "$idx" #
"|" # apple_kind # "\t$Rd, $Rn, $Rm$idx}", "$Rd = $dst", pattern>, "|" # apple_kind # "\t$Rd, $Rn, $Rm$idx}", "$Rd = $dst", pattern>,
Sched<[WriteV]> { Sched<[WriteVd]> {
bits<5> Rd; bits<5> Rd;
bits<5> Rn; bits<5> Rn;
bits<5> Rm; bits<5> Rm;
@ -8020,7 +8020,7 @@ class SIMDBF16MLALIndex<bit Q, string asm, SDPatternOperator OpNode>
(v8bf16 (v8bf16
(AArch64duplane16 (v8bf16 V128_lo:$Rm), (AArch64duplane16 (v8bf16 V128_lo:$Rm),
VectorIndexH:$idx)))))]>, VectorIndexH:$idx)))))]>,
Sched<[WriteV]> { Sched<[WriteVq]> {
bits<5> Rd; bits<5> Rd;
bits<5> Rn; bits<5> Rn;
bits<4> Rm; bits<4> Rm;
@ -8941,7 +8941,7 @@ class BaseSIMDScalarShift<bit U, bits<5> opc, bits<7> fixed_imm,
Operand immtype, string asm, list<dag> pattern> Operand immtype, string asm, list<dag> pattern>
: I<(outs regtype1:$Rd), (ins regtype2:$Rn, immtype:$imm), : I<(outs regtype1:$Rd), (ins regtype2:$Rn, immtype:$imm),
asm, "\t$Rd, $Rn, $imm", "", pattern>, asm, "\t$Rd, $Rn, $imm", "", pattern>,
Sched<[WriteV]> { Sched<[WriteVd]> {
bits<5> Rd; bits<5> Rd;
bits<5> Rn; bits<5> Rn;
bits<7> imm; bits<7> imm;
@ -8961,7 +8961,7 @@ class BaseSIMDScalarShiftTied<bit U, bits<5> opc, bits<7> fixed_imm,
Operand immtype, string asm, list<dag> pattern> Operand immtype, string asm, list<dag> pattern>
: I<(outs regtype1:$dst), (ins regtype1:$Rd, regtype2:$Rn, immtype:$imm), : I<(outs regtype1:$dst), (ins regtype1:$Rd, regtype2:$Rn, immtype:$imm),
asm, "\t$Rd, $Rn, $imm", "$Rd = $dst", pattern>, asm, "\t$Rd, $Rn, $imm", "$Rd = $dst", pattern>,
Sched<[WriteV]> { Sched<[WriteVd]> {
bits<5> Rd; bits<5> Rd;
bits<5> Rn; bits<5> Rn;
bits<7> imm; bits<7> imm;
@ -9125,7 +9125,7 @@ class BaseSIMDVectorShift<bit Q, bit U, bits<5> opc, bits<7> fixed_imm,
: I<(outs dst_reg:$Rd), (ins src_reg:$Rn, immtype:$imm), : I<(outs dst_reg:$Rd), (ins src_reg:$Rn, immtype:$imm),
asm, "{\t$Rd" # dst_kind # ", $Rn" # src_kind # ", $imm" # asm, "{\t$Rd" # dst_kind # ", $Rn" # src_kind # ", $imm" #
"|" # dst_kind # "\t$Rd, $Rn, $imm}", "", pattern>, "|" # dst_kind # "\t$Rd, $Rn, $imm}", "", pattern>,
Sched<[WriteV]> { Sched<[!if(Q, WriteVq, WriteVd)]> {
bits<5> Rd; bits<5> Rd;
bits<5> Rn; bits<5> Rn;
let Inst{31} = 0; let Inst{31} = 0;
@ -9148,7 +9148,7 @@ class BaseSIMDVectorShiftTied<bit Q, bit U, bits<5> opc, bits<7> fixed_imm,
: I<(outs vectype1:$dst), (ins vectype1:$Rd, vectype2:$Rn, immtype:$imm), : I<(outs vectype1:$dst), (ins vectype1:$Rd, vectype2:$Rn, immtype:$imm),
asm, "{\t$Rd" # dst_kind # ", $Rn" # src_kind # ", $imm" # asm, "{\t$Rd" # dst_kind # ", $Rn" # src_kind # ", $imm" #
"|" # dst_kind # "\t$Rd, $Rn, $imm}", "$Rd = $dst", pattern>, "|" # dst_kind # "\t$Rd, $Rn, $imm}", "$Rd = $dst", pattern>,
Sched<[WriteV]> { Sched<[!if(Q, WriteVq, WriteVd)]> {
bits<5> Rd; bits<5> Rd;
bits<5> Rn; bits<5> Rn;
let Inst{31} = 0; let Inst{31} = 0;
@ -10695,7 +10695,7 @@ class BaseSIMDThreeSameVectorComplex<bit Q, bit U, bits<2> size, bits<3> opcode,
: I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm, rottype:$rot), asm, : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm, rottype:$rot), asm,
"{\t$Rd" # kind # ", $Rn" # kind # ", $Rm" # kind # ", $rot" "{\t$Rd" # kind # ", $Rn" # kind # ", $Rm" # kind # ", $rot"
"|" # kind # "\t$Rd, $Rn, $Rm, $rot}", "", pattern>, "|" # kind # "\t$Rd, $Rn, $Rm, $rot}", "", pattern>,
Sched<[WriteV]> { Sched<[!if(Q, WriteVq, WriteVd)]> {
bits<5> Rd; bits<5> Rd;
bits<5> Rn; bits<5> Rn;
bits<5> Rm; bits<5> Rm;
@ -10769,7 +10769,7 @@ class BaseSIMDThreeSameVectorTiedComplex<bit Q, bit U, bits<2> size,
(ins regtype:$Rd, regtype:$Rn, regtype:$Rm, rottype:$rot), asm, (ins regtype:$Rd, regtype:$Rn, regtype:$Rm, rottype:$rot), asm,
"{\t$Rd" # kind # ", $Rn" # kind # ", $Rm" # kind # ", $rot" "{\t$Rd" # kind # ", $Rn" # kind # ", $Rm" # kind # ", $rot"
"|" # kind # "\t$Rd, $Rn, $Rm, $rot}", "$Rd = $dst", pattern>, "|" # kind # "\t$Rd, $Rn, $Rm, $rot}", "$Rd = $dst", pattern>,
Sched<[WriteV]> { Sched<[!if(Q, WriteVq, WriteVd)]> {
bits<5> Rd; bits<5> Rd;
bits<5> Rn; bits<5> Rn;
bits<5> Rm; bits<5> Rm;
@ -10845,7 +10845,7 @@ class BaseSIMDIndexedTiedComplex<bit Q, bit U, bit Scalar, bits<2> size,
"{\t$Rd" # dst_kind # ", $Rn" # lhs_kind # ", $Rm" # rhs_kind # "{\t$Rd" # dst_kind # ", $Rn" # lhs_kind # ", $Rm" # rhs_kind #
"$idx, $rot" # "|" # apple_kind # "$idx, $rot" # "|" # apple_kind #
"\t$Rd, $Rn, $Rm$idx, $rot}", "$Rd = $dst", pattern>, "\t$Rd, $Rn, $Rm$idx, $rot}", "$Rd = $dst", pattern>,
Sched<[WriteV]> { Sched<[!if(Q, WriteVq, WriteVd)]> {
bits<5> Rd; bits<5> Rd;
bits<5> Rn; bits<5> Rn;
bits<5> Rm; bits<5> Rm;
@ -10910,7 +10910,7 @@ let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
class AESBase<bits<4> opc, string asm, dag outs, dag ins, string cstr, class AESBase<bits<4> opc, string asm, dag outs, dag ins, string cstr,
list<dag> pat> list<dag> pat>
: I<outs, ins, asm, "{\t$Rd.16b, $Rn.16b|.16b\t$Rd, $Rn}", cstr, pat>, : I<outs, ins, asm, "{\t$Rd.16b, $Rn.16b|.16b\t$Rd, $Rn}", cstr, pat>,
Sched<[WriteV]>{ Sched<[WriteVq]>{
bits<5> Rd; bits<5> Rd;
bits<5> Rn; bits<5> Rn;
let Inst{31-16} = 0b0100111000101000; let Inst{31-16} = 0b0100111000101000;
@ -10936,7 +10936,7 @@ class SHA3OpTiedInst<bits<3> opc, string asm, string dst_lhs_kind,
: I<oops, iops, asm, : I<oops, iops, asm,
"{\t$Rd" # dst_lhs_kind # ", $Rn" # dst_lhs_kind # ", $Rm.4s" # "{\t$Rd" # dst_lhs_kind # ", $Rn" # dst_lhs_kind # ", $Rm.4s" #
"|.4s\t$Rd, $Rn, $Rm}", "$Rd = $dst", pat>, "|.4s\t$Rd, $Rn, $Rm}", "$Rd = $dst", pat>,
Sched<[WriteV]>{ Sched<[WriteVq]>{
bits<5> Rd; bits<5> Rd;
bits<5> Rn; bits<5> Rn;
bits<5> Rm; bits<5> Rm;
@ -10976,7 +10976,7 @@ class SHA2OpInst<bits<4> opc, string asm, string kind,
list<dag> pat> list<dag> pat>
: I<oops, iops, asm, "{\t$Rd" # kind # ", $Rn" # kind # : I<oops, iops, asm, "{\t$Rd" # kind # ", $Rn" # kind #
"|" # kind # "\t$Rd, $Rn}", cstr, pat>, "|" # kind # "\t$Rd, $Rn}", cstr, pat>,
Sched<[WriteV]>{ Sched<[WriteVq]>{
bits<5> Rd; bits<5> Rd;
bits<5> Rn; bits<5> Rn;
let Inst{31-16} = 0b0101111000101000; let Inst{31-16} = 0b0101111000101000;
@ -10999,7 +10999,7 @@ class SHAInstSS<bits<4> opc, string asm, Intrinsic OpNode>
// Armv8.2-A Crypto extensions // Armv8.2-A Crypto extensions
class BaseCryptoV82<dag oops, dag iops, string asm, string asmops, string cst, class BaseCryptoV82<dag oops, dag iops, string asm, string asmops, string cst,
list<dag> pattern> list<dag> pattern>
: I <oops, iops, asm, asmops, cst, pattern>, Sched<[WriteV]> { : I <oops, iops, asm, asmops, cst, pattern>, Sched<[WriteVq]> {
bits<5> Vd; bits<5> Vd;
bits<5> Vn; bits<5> Vn;
let Inst{31-25} = 0b1100111; let Inst{31-25} = 0b1100111;

View File

@ -7006,9 +7006,9 @@ def AESIMCrr : AESInst< 0b0111, "aesimc", int_aarch64_crypto_aesimc>;
// for AES fusion on some CPUs. // for AES fusion on some CPUs.
let hasSideEffects = 0, mayStore = 0, mayLoad = 0 in { let hasSideEffects = 0, mayStore = 0, mayLoad = 0 in {
def AESMCrrTied: Pseudo<(outs V128:$Rd), (ins V128:$Rn), [], "$Rn = $Rd">, def AESMCrrTied: Pseudo<(outs V128:$Rd), (ins V128:$Rn), [], "$Rn = $Rd">,
Sched<[WriteV]>; Sched<[WriteVq]>;
def AESIMCrrTied: Pseudo<(outs V128:$Rd), (ins V128:$Rn), [], "$Rn = $Rd">, def AESIMCrrTied: Pseudo<(outs V128:$Rd), (ins V128:$Rn), [], "$Rn = $Rd">,
Sched<[WriteV]>; Sched<[WriteVq]>;
} }
// Only use constrained versions of AES(I)MC instructions if they are paired with // Only use constrained versions of AES(I)MC instructions if they are paired with

View File

@ -127,7 +127,8 @@ def : WriteRes<WriteFCmp, [A53UnitFPALU]> { let Latency = 6; }
def : WriteRes<WriteFCvt, [A53UnitFPALU]> { let Latency = 6; } def : WriteRes<WriteFCvt, [A53UnitFPALU]> { let Latency = 6; }
def : WriteRes<WriteFCopy, [A53UnitFPALU]> { let Latency = 6; } def : WriteRes<WriteFCopy, [A53UnitFPALU]> { let Latency = 6; }
def : WriteRes<WriteFImm, [A53UnitFPALU]> { let Latency = 6; } def : WriteRes<WriteFImm, [A53UnitFPALU]> { let Latency = 6; }
def : WriteRes<WriteV, [A53UnitFPALU]> { let Latency = 6; } def : WriteRes<WriteVd, [A53UnitFPALU]> { let Latency = 6; }
def : WriteRes<WriteVq, [A53UnitFPALU]> { let Latency = 6; }
// FP Mul, Div, Sqrt // FP Mul, Div, Sqrt
def : WriteRes<WriteFMul, [A53UnitFPMDS]> { let Latency = 6; } def : WriteRes<WriteFMul, [A53UnitFPMDS]> { let Latency = 6; }

View File

@ -149,9 +149,11 @@ def : WriteRes<WriteFCmp, [CortexA55UnitFPALU]> { let Latency = 3; }
def : WriteRes<WriteFCvt, [CortexA55UnitFPALU]> { let Latency = 4; } def : WriteRes<WriteFCvt, [CortexA55UnitFPALU]> { let Latency = 4; }
def : WriteRes<WriteFCopy, [CortexA55UnitFPALU]> { let Latency = 3; } def : WriteRes<WriteFCopy, [CortexA55UnitFPALU]> { let Latency = 3; }
def : WriteRes<WriteFImm, [CortexA55UnitFPALU]> { let Latency = 3; } def : WriteRes<WriteFImm, [CortexA55UnitFPALU]> { let Latency = 3; }
def : WriteRes<WriteV, [CortexA55UnitFPALU]> { let Latency = 4; } def : WriteRes<WriteVd, [CortexA55UnitFPALU]> { let Latency = 4; }
def : WriteRes<WriteVq, [CortexA55UnitFPALU,CortexA55UnitFPALU]> { let Latency = 4; let BeginGroup = 1; }
// FP ALU specific new schedwrite definitions // FP ALU specific new schedwrite definitions
def CortexA55WriteFPALU_F2 : SchedWriteRes<[CortexA55UnitFPALU]> { let Latency = 2;}
def CortexA55WriteFPALU_F3 : SchedWriteRes<[CortexA55UnitFPALU]> { let Latency = 3;} def CortexA55WriteFPALU_F3 : SchedWriteRes<[CortexA55UnitFPALU]> { let Latency = 3;}
def CortexA55WriteFPALU_F4 : SchedWriteRes<[CortexA55UnitFPALU]> { let Latency = 4;} def CortexA55WriteFPALU_F4 : SchedWriteRes<[CortexA55UnitFPALU]> { let Latency = 4;}
def CortexA55WriteFPALU_F5 : SchedWriteRes<[CortexA55UnitFPALU]> { let Latency = 5;} def CortexA55WriteFPALU_F5 : SchedWriteRes<[CortexA55UnitFPALU]> { let Latency = 5;}
@ -331,6 +333,8 @@ def : InstRW<[CortexA55WriteVST4, WriteAdr], (instregex "ST4Fourv(8b|4h|2s|1d|16
//--- //---
// Floating Point Conversions, MAC, DIV, SQRT // Floating Point Conversions, MAC, DIV, SQRT
//--- //---
def : InstRW<[CortexA55WriteFPALU_F2], (instregex "^DUP(v2i64|v4i32|v8i16|v16i8)")>;
def : InstRW<[CortexA55WriteFPALU_F2], (instregex "^XTN")>;
def : InstRW<[CortexA55WriteFPALU_F3], (instregex "^FCVT[ALMNPZ][SU](S|U)?(W|X)")>; def : InstRW<[CortexA55WriteFPALU_F3], (instregex "^FCVT[ALMNPZ][SU](S|U)?(W|X)")>;
def : InstRW<[CortexA55WriteFPALU_F4], (instregex "^FCVT(X)?[ALMNPXZ](S|U|N)?v")>; def : InstRW<[CortexA55WriteFPALU_F4], (instregex "^FCVT(X)?[ALMNPXZ](S|U|N)?v")>;

View File

@ -96,7 +96,8 @@ def : SchedAlias<WriteFCopy, A57Write_5cyc_1L>;
def : SchedAlias<WriteFImm, A57Write_3cyc_1V>; def : SchedAlias<WriteFImm, A57Write_3cyc_1V>;
def : WriteRes<WriteFMul, [A57UnitV]> { let Latency = 5;} def : WriteRes<WriteFMul, [A57UnitV]> { let Latency = 5;}
def : SchedAlias<WriteFDiv, A57Write_17cyc_1W>; def : SchedAlias<WriteFDiv, A57Write_17cyc_1W>;
def : SchedAlias<WriteV, A57Write_3cyc_1V>; def : SchedAlias<WriteVd, A57Write_3cyc_1V>;
def : SchedAlias<WriteVq, A57Write_3cyc_1V>;
def : SchedAlias<WriteVLD, A57Write_5cyc_1L>; def : SchedAlias<WriteVLD, A57Write_5cyc_1L>;
def : SchedAlias<WriteVST, A57Write_1cyc_1S>; def : SchedAlias<WriteVST, A57Write_1cyc_1S>;

View File

@ -1627,7 +1627,11 @@ def : InstRW<[A64FXWrite_FMOV_VG14], (instrs FMOVDXHighr)>;
// ASIMD shift by register, basic, Q-form // ASIMD shift by register, basic, Q-form
// ASIMD shift by register, complex, D-form // ASIMD shift by register, complex, D-form
// ASIMD shift by register, complex, Q-form // ASIMD shift by register, complex, Q-form
def : WriteRes<WriteV, [A64FXGI03]> { def : WriteRes<WriteVd, [A64FXGI03]> {
let Latency = 4;
let ResourceCycles = [1];
}
def : WriteRes<WriteVq, [A64FXGI03]> {
let Latency = 4; let Latency = 4;
let ResourceCycles = [1]; let ResourceCycles = [1];
} }

View File

@ -304,7 +304,8 @@ def : WriteRes<WriteSys, []> {let Latency = -1;}
// 7.9 Vector Unit Instructions // 7.9 Vector Unit Instructions
// Simple vector operations take 2 cycles. // Simple vector operations take 2 cycles.
def : WriteRes<WriteV, [CyUnitV]> {let Latency = 2;} def : WriteRes<WriteVd, [CyUnitV]> {let Latency = 2;}
def : WriteRes<WriteVq, [CyUnitV]> {let Latency = 2;}
// Define some longer latency vector op types for Cyclone. // Define some longer latency vector op types for Cyclone.
def CyWriteV3 : SchedWriteRes<[CyUnitV]> {let Latency = 3;} def CyWriteV3 : SchedWriteRes<[CyUnitV]> {let Latency = 3;}
@ -335,7 +336,7 @@ def : WriteRes<WriteFImm, [CyUnitV]> {let Latency = 2;}
// COPY is handled above in the WriteMov Variant. // COPY is handled above in the WriteMov Variant.
def WriteVMov : SchedWriteVariant<[ def WriteVMov : SchedWriteVariant<[
SchedVar<WriteVMovPred, [WriteX]>, SchedVar<WriteVMovPred, [WriteX]>,
SchedVar<NoSchedPred, [WriteV]>]>; SchedVar<NoSchedPred, [WriteVq]>]>;
def : InstRW<[WriteVMov], (instrs ORRv16i8)>; def : InstRW<[WriteVMov], (instrs ORRv16i8)>;
// FMOVSr,FMOVDr are WriteF. // FMOVSr,FMOVDr are WriteF.
@ -355,7 +356,7 @@ def : WriteRes<WriteFCopy, [CyUnitLS]> {
def : InstRW<[WriteLD], (instrs FMOVSWr,FMOVDXr,FMOVDXHighr)>; def : InstRW<[WriteLD], (instrs FMOVSWr,FMOVDXr,FMOVDXHighr)>;
// INS V[x],R // INS V[x],R
def CyWriteCopyToFPR : WriteSequence<[WriteVLD, WriteV]>; def CyWriteCopyToFPR : WriteSequence<[WriteVLD, WriteVq]>;
def : InstRW<[CyWriteCopyToFPR], (instregex "INSv")>; def : InstRW<[CyWriteCopyToFPR], (instregex "INSv")>;
// SMOV,UMOV R,V[x] // SMOV,UMOV R,V[x]
@ -571,7 +572,7 @@ def : InstRW<[WriteFRSQRTS], (instregex "FRSQRTSv")>;
//--- //---
// FCVT lengthen f16/s32 // FCVT lengthen f16/s32
def : InstRW<[WriteV], (instrs FCVTSHr,FCVTDHr,FCVTDSr)>; def : InstRW<[WriteVq], (instrs FCVTSHr,FCVTDHr,FCVTDSr)>;
// FCVT,FCVTN,FCVTXN // FCVT,FCVTN,FCVTXN
// SCVTF,UCVTF V,V // SCVTF,UCVTF V,V
@ -681,61 +682,61 @@ def : InstRW<[WriteVLDShuffle],
def : InstRW<[WriteVLDShuffle, WriteAdr], def : InstRW<[WriteVLDShuffle, WriteAdr],
(instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
def : InstRW<[WriteVLDShuffle, WriteV], def : InstRW<[WriteVLDShuffle, WriteVq],
(instregex "LD2Twov(8b|4h|2s)$")>; (instregex "LD2Twov(8b|4h|2s)$")>;
def : InstRW<[WriteVLDShuffle, WriteAdr, WriteV], def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVq],
(instregex "LD2Twov(8b|4h|2s)_POST$")>; (instregex "LD2Twov(8b|4h|2s)_POST$")>;
def : InstRW<[WriteVLDShuffle, WriteVLDShuffle], def : InstRW<[WriteVLDShuffle, WriteVLDShuffle],
(instregex "LD2Twov(16b|8h|4s|2d)$")>; (instregex "LD2Twov(16b|8h|4s|2d)$")>;
def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle], def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle],
(instregex "LD2Twov(16b|8h|4s|2d)_POST")>; (instregex "LD2Twov(16b|8h|4s|2d)_POST")>;
def : InstRW<[WriteVLDShuffle, ReadVLD, WriteV], def : InstRW<[WriteVLDShuffle, ReadVLD, WriteVq],
(instregex "LD2i(8|16|32)$")>; (instregex "LD2i(8|16|32)$")>;
def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteV], def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteVq],
(instregex "LD2i(8|16|32)_POST")>; (instregex "LD2i(8|16|32)_POST")>;
def : InstRW<[WriteVLDShuffle, ReadVLD, WriteV], def : InstRW<[WriteVLDShuffle, ReadVLD, WriteVq],
(instregex "LD2i64$")>; (instregex "LD2i64$")>;
def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteV], def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteVq],
(instregex "LD2i64_POST")>; (instregex "LD2i64_POST")>;
def : InstRW<[WriteVLDShuffle, WriteV], def : InstRW<[WriteVLDShuffle, WriteVq],
(instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
def : InstRW<[WriteVLDShuffle, WriteAdr, WriteV], def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVq],
(instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST")>; (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST")>;
def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteV], def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteVq],
(instregex "LD3Threev(8b|4h|2s)$")>; (instregex "LD3Threev(8b|4h|2s)$")>;
def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteV], def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteVq],
(instregex "LD3Threev(8b|4h|2s)_POST")>; (instregex "LD3Threev(8b|4h|2s)_POST")>;
def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteVLDShuffle], def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteVLDShuffle],
(instregex "LD3Threev(16b|8h|4s|2d)$")>; (instregex "LD3Threev(16b|8h|4s|2d)$")>;
def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteVLDShuffle], def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteVLDShuffle],
(instregex "LD3Threev(16b|8h|4s|2d)_POST")>; (instregex "LD3Threev(16b|8h|4s|2d)_POST")>;
def : InstRW<[WriteVLDShuffle, ReadVLD, WriteV, WriteV], def : InstRW<[WriteVLDShuffle, ReadVLD, WriteVq, WriteVq],
(instregex "LD3i(8|16|32)$")>; (instregex "LD3i(8|16|32)$")>;
def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteV, WriteV], def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteVq, WriteVq],
(instregex "LD3i(8|16|32)_POST")>; (instregex "LD3i(8|16|32)_POST")>;
def : InstRW<[WriteVLDShuffle, ReadVLD, WriteVLDShuffle, WriteV], def : InstRW<[WriteVLDShuffle, ReadVLD, WriteVLDShuffle, WriteVq],
(instregex "LD3i64$")>; (instregex "LD3i64$")>;
def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteVLDShuffle, WriteV], def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteVLDShuffle, WriteVq],
(instregex "LD3i64_POST")>; (instregex "LD3i64_POST")>;
def : InstRW<[WriteVLDShuffle, WriteV, WriteV], def : InstRW<[WriteVLDShuffle, WriteVq, WriteVq],
(instregex "LD3Rv(8b|4h|2s|16b|8h|4s)$")>; (instregex "LD3Rv(8b|4h|2s|16b|8h|4s)$")>;
def : InstRW<[WriteVLDShuffle, WriteAdr, WriteV, WriteV], def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVq, WriteVq],
(instregex "LD3Rv(8b|4h|2s|16b|8h|4s)_POST")>; (instregex "LD3Rv(8b|4h|2s|16b|8h|4s)_POST")>;
def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteV], def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteVq],
(instrs LD3Rv1d,LD3Rv2d)>; (instrs LD3Rv1d,LD3Rv2d)>;
def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteV], def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteVq],
(instrs LD3Rv1d_POST,LD3Rv2d_POST)>; (instrs LD3Rv1d_POST,LD3Rv2d_POST)>;
def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteV, WriteV], def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteVq, WriteVq],
(instregex "LD4Fourv(8b|4h|2s)$")>; (instregex "LD4Fourv(8b|4h|2s)$")>;
def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteV, WriteV], def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteVq, WriteVq],
(instregex "LD4Fourv(8b|4h|2s)_POST")>; (instregex "LD4Fourv(8b|4h|2s)_POST")>;
def : InstRW<[WriteVLDPairShuffle, WriteVLDPairShuffle, def : InstRW<[WriteVLDPairShuffle, WriteVLDPairShuffle,
WriteVLDPairShuffle, WriteVLDPairShuffle], WriteVLDPairShuffle, WriteVLDPairShuffle],
@ -744,25 +745,25 @@ def : InstRW<[WriteVLDPairShuffle, WriteAdr, WriteVLDPairShuffle,
WriteVLDPairShuffle, WriteVLDPairShuffle], WriteVLDPairShuffle, WriteVLDPairShuffle],
(instregex "LD4Fourv(16b|8h|4s|2d)_POST")>; (instregex "LD4Fourv(16b|8h|4s|2d)_POST")>;
def : InstRW<[WriteVLDShuffle, ReadVLD, WriteV, WriteV, WriteV], def : InstRW<[WriteVLDShuffle, ReadVLD, WriteVq, WriteVq, WriteVq],
(instregex "LD4i(8|16|32)$")>; (instregex "LD4i(8|16|32)$")>;
def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteV, WriteV, WriteV], def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteVq, WriteVq, WriteVq],
(instregex "LD4i(8|16|32)_POST")>; (instregex "LD4i(8|16|32)_POST")>;
def : InstRW<[WriteVLDShuffle, ReadVLD, WriteVLDShuffle, WriteV, WriteV], def : InstRW<[WriteVLDShuffle, ReadVLD, WriteVLDShuffle, WriteVq, WriteVq],
(instrs LD4i64)>; (instrs LD4i64)>;
def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteVLDShuffle, WriteV], def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteVLDShuffle, WriteVq],
(instrs LD4i64_POST)>; (instrs LD4i64_POST)>;
def : InstRW<[WriteVLDShuffle, WriteV, WriteV, WriteV], def : InstRW<[WriteVLDShuffle, WriteVq, WriteVq, WriteVq],
(instregex "LD4Rv(8b|4h|2s|16b|8h|4s)$")>; (instregex "LD4Rv(8b|4h|2s|16b|8h|4s)$")>;
def : InstRW<[WriteVLDShuffle, WriteAdr, WriteV, WriteV, WriteV], def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVq, WriteVq, WriteVq],
(instregex "LD4Rv(8b|4h|2s|16b|8h|4s)_POST")>; (instregex "LD4Rv(8b|4h|2s|16b|8h|4s)_POST")>;
def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteV, WriteV], def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteVq, WriteVq],
(instrs LD4Rv1d,LD4Rv2d)>; (instrs LD4Rv1d,LD4Rv2d)>;
def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteV, WriteV], def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteVq, WriteVq],
(instrs LD4Rv1d_POST,LD4Rv2d_POST)>; (instrs LD4Rv1d_POST,LD4Rv2d_POST)>;
//--- //---

View File

@ -254,7 +254,8 @@ def : WriteRes<WriteVST, [M3UnitS,
let NumMicroOps = 1; } let NumMicroOps = 1; }
// ASIMD FP instructions. // ASIMD FP instructions.
def : WriteRes<WriteV, [M3UnitNALU]> { let Latency = 3; } def : WriteRes<WriteVd, [M3UnitNALU]> { let Latency = 3; }
def : WriteRes<WriteVq, [M3UnitNALU]> { let Latency = 3; }
// Other miscellaneous instructions. // Other miscellaneous instructions.
def : WriteRes<WriteAtomic, []> { let Unsupported = 1; } def : WriteRes<WriteAtomic, []> { let Unsupported = 1; }

View File

@ -558,7 +558,8 @@ def : SchedAlias<WriteVLD, M4WriteL5>;
def : SchedAlias<WriteVST, M4WriteVST1>; def : SchedAlias<WriteVST, M4WriteVST1>;
// ASIMD FP instructions. // ASIMD FP instructions.
def : SchedAlias<WriteV, M4WriteNALU1>; def : SchedAlias<WriteVd, M4WriteNALU1>;
def : SchedAlias<WriteVq, M4WriteNALU1>;
// Other miscellaneous instructions. // Other miscellaneous instructions.
def : WriteRes<WriteAtomic, []> { let Unsupported = 1; } def : WriteRes<WriteAtomic, []> { let Unsupported = 1; }

View File

@ -594,7 +594,8 @@ def : SchedAlias<WriteVLD, M5WriteL6>;
def : SchedAlias<WriteVST, M5WriteVST1>; def : SchedAlias<WriteVST, M5WriteVST1>;
// ASIMD FP instructions. // ASIMD FP instructions.
def : SchedAlias<WriteV, M5WriteNALU1>; def : SchedAlias<WriteVd, M5WriteNALU1>;
def : SchedAlias<WriteVq, M5WriteNALU1>;
// Other miscellaneous instructions. // Other miscellaneous instructions.
def : WriteRes<WriteBarrier, []> { let Latency = 1; } def : WriteRes<WriteBarrier, []> { let Latency = 1; }

View File

@ -92,7 +92,8 @@ def : WriteRes<WriteFCopy, []> { let Unsupported = 1; }
def : WriteRes<WriteFImm, []> { let Unsupported = 1; } def : WriteRes<WriteFImm, []> { let Unsupported = 1; }
def : WriteRes<WriteFMul, []> { let Unsupported = 1; } def : WriteRes<WriteFMul, []> { let Unsupported = 1; }
def : WriteRes<WriteFDiv, []> { let Unsupported = 1; } def : WriteRes<WriteFDiv, []> { let Unsupported = 1; }
def : WriteRes<WriteV, []> { let Unsupported = 1; } def : WriteRes<WriteVd, []> { let Unsupported = 1; }
def : WriteRes<WriteVq, []> { let Unsupported = 1; }
def : WriteRes<WriteVLD, []> { let Unsupported = 1; } def : WriteRes<WriteVLD, []> { let Unsupported = 1; }
def : WriteRes<WriteVST, []> { let Unsupported = 1; } def : WriteRes<WriteVST, []> { let Unsupported = 1; }
def : WriteRes<WriteSys, []> { let Unsupported = 1; } def : WriteRes<WriteSys, []> { let Unsupported = 1; }

View File

@ -95,7 +95,8 @@ def : WriteRes<WriteFMul, [KryoUnitX, KryoUnitX]>
{ let Latency = 6; let NumMicroOps = 2; } { let Latency = 6; let NumMicroOps = 2; }
def : WriteRes<WriteFDiv, [KryoUnitXA, KryoUnitY]> def : WriteRes<WriteFDiv, [KryoUnitXA, KryoUnitY]>
{ let Latency = 12; let NumMicroOps = 2; } // Fragent -1 / NoRSV +1 { let Latency = 12; let NumMicroOps = 2; } // Fragent -1 / NoRSV +1
def : WriteRes<WriteV, [KryoUnitXY]> { let Latency = 6; } def : WriteRes<WriteVd, [KryoUnitXY]> { let Latency = 6; }
def : WriteRes<WriteVq, [KryoUnitXY]> { let Latency = 6; }
def : WriteRes<WriteVLD, [KryoUnitLS]> { let Latency = 4; } def : WriteRes<WriteVLD, [KryoUnitLS]> { let Latency = 4; }
def : WriteRes<WriteVST, [KryoUnitLS]> { let Latency = 4; } def : WriteRes<WriteVST, [KryoUnitLS]> { let Latency = 4; }

View File

@ -90,7 +90,8 @@ def : WriteRes<WriteFMul, [TSV110UnitF]> { let Latency = 5; }
// FP Div, Sqrt // FP Div, Sqrt
def : WriteRes<WriteFDiv, [TSV110UnitFSU1]> { let Latency = 18; } def : WriteRes<WriteFDiv, [TSV110UnitFSU1]> { let Latency = 18; }
def : WriteRes<WriteV, [TSV110UnitF]> { let Latency = 4; } def : WriteRes<WriteVd, [TSV110UnitF]> { let Latency = 4; }
def : WriteRes<WriteVq, [TSV110UnitF]> { let Latency = 4; }
def : WriteRes<WriteVLD, [TSV110UnitFLdSt]> { let Latency = 5; } def : WriteRes<WriteVLD, [TSV110UnitFLdSt]> { let Latency = 5; }
def : WriteRes<WriteVST, [TSV110UnitF]> { let Latency = 1; } def : WriteRes<WriteVST, [TSV110UnitF]> { let Latency = 1; }

View File

@ -154,7 +154,8 @@ def : WriteRes<WriteFCmp, [THXT8XUnitFPALU]> { let Latency = 6; }
def : WriteRes<WriteFCvt, [THXT8XUnitFPALU]> { let Latency = 6; } def : WriteRes<WriteFCvt, [THXT8XUnitFPALU]> { let Latency = 6; }
def : WriteRes<WriteFCopy, [THXT8XUnitFPALU]> { let Latency = 6; } def : WriteRes<WriteFCopy, [THXT8XUnitFPALU]> { let Latency = 6; }
def : WriteRes<WriteFImm, [THXT8XUnitFPALU]> { let Latency = 6; } def : WriteRes<WriteFImm, [THXT8XUnitFPALU]> { let Latency = 6; }
def : WriteRes<WriteV, [THXT8XUnitFPALU]> { let Latency = 6; } def : WriteRes<WriteVd, [THXT8XUnitFPALU]> { let Latency = 6; }
def : WriteRes<WriteVq, [THXT8XUnitFPALU]> { let Latency = 6; }
// FP Mul, Div, Sqrt // FP Mul, Div, Sqrt
def : WriteRes<WriteFMul, [THXT8XUnitFPMDS]> { let Latency = 6; } def : WriteRes<WriteFMul, [THXT8XUnitFPMDS]> { let Latency = 6; }

View File

@ -1250,7 +1250,12 @@ def : InstRW<[THX2T99Write_5Cyc_F01], (instrs FMOVXDHighr, FMOVDXHighr)>;
// ASIMD shift by register, basic, Q-form // ASIMD shift by register, basic, Q-form
// ASIMD shift by register, complex, D-form // ASIMD shift by register, complex, D-form
// ASIMD shift by register, complex, Q-form // ASIMD shift by register, complex, Q-form
def : WriteRes<WriteV, [THX2T99F01]> { def : WriteRes<WriteVd, [THX2T99F01]> {
let Latency = 7;
let NumMicroOps = 4;
let ResourceCycles = [4];
}
def : WriteRes<WriteVq, [THX2T99F01]> {
let Latency = 7; let Latency = 7;
let NumMicroOps = 4; let NumMicroOps = 4;
let ResourceCycles = [4]; let ResourceCycles = [4];

View File

@ -1357,7 +1357,12 @@ def : InstRW<[THX3T110Write_5Cyc_F01], (instrs FMOVXDHighr, FMOVDXHighr)>;
// ASIMD shift by register, basic, Q-form // ASIMD shift by register, basic, Q-form
// ASIMD shift by register, complex, D-form // ASIMD shift by register, complex, D-form
// ASIMD shift by register, complex, Q-form // ASIMD shift by register, complex, Q-form
def : WriteRes<WriteV, [THX3T110FP0123]> { def : WriteRes<WriteVd, [THX3T110FP0123]> {
let Latency = 5;
let NumMicroOps = 4;
let ResourceCycles = [4];
}
def : WriteRes<WriteVq, [THX3T110FP0123]> {
let Latency = 5; let Latency = 5;
let NumMicroOps = 4; let NumMicroOps = 4;
let ResourceCycles = [4]; let ResourceCycles = [4];

View File

@ -77,7 +77,8 @@ def WriteFImm : SchedWrite; // Floating-point immediate.
def WriteFMul : SchedWrite; // Floating-point multiply. def WriteFMul : SchedWrite; // Floating-point multiply.
def WriteFDiv : SchedWrite; // Floating-point division. def WriteFDiv : SchedWrite; // Floating-point division.
def WriteV : SchedWrite; // Vector ops. def WriteVd : SchedWrite; // 64bit Vector D ops.
def WriteVq : SchedWrite; // 128bit Vector Q ops.
def WriteVLD : SchedWrite; // Vector loads. def WriteVLD : SchedWrite; // Vector loads.
def WriteVST : SchedWrite; // Vector stores. def WriteVST : SchedWrite; // Vector stores.
@ -87,9 +88,9 @@ def WriteAtomic : SchedWrite; // Atomic memory operations (CAS, Swap, LDOP)
def ReadVLD : SchedRead; def ReadVLD : SchedRead;
// Sequential vector load and shuffle. // Sequential vector load and shuffle.
def WriteVLDShuffle : WriteSequence<[WriteVLD, WriteV]>; def WriteVLDShuffle : WriteSequence<[WriteVLD, WriteVq]>;
def WriteVLDPairShuffle : WriteSequence<[WriteVLD, WriteV, WriteV]>; def WriteVLDPairShuffle : WriteSequence<[WriteVLD, WriteVq, WriteVq]>;
// Store a shuffled vector. // Store a shuffled vector.
def WriteVSTShuffle : WriteSequence<[WriteV, WriteVST]>; def WriteVSTShuffle : WriteSequence<[WriteVq, WriteVST]>;
def WriteVSTPairShuffle : WriteSequence<[WriteV, WriteV, WriteVST]>; def WriteVSTPairShuffle : WriteSequence<[WriteVq, WriteVq, WriteVST]>;

File diff suppressed because it is too large Load Diff