forked from OSchip/llvm-project
[AArch64] Model Cortex-A55 Q register NEON instructions
Cortex-A55 has 2 64bit NEON vector units, meaning a 128bit instruction requires taking both units (and can only be issued as the first instruction in a dual issue pair). This patch models that by splitting the WriteV SchedWrite into two - the WriteVd that reads/writes only 64bit operands, and the WriteVq that read/writes 128bit registers. The A55 schedule then uses this distinction to model the WriteVq as taking both resource units, and starting a Schedule Group and WriteVd as taking one as before. I believe this is more correct, even if it does not lead to much better performance. Differential Revision: https://reviews.llvm.org/D108766
This commit is contained in:
parent
9b10e2b1cf
commit
e9adcbde31
|
@ -5310,7 +5310,7 @@ class BaseSIMDThreeSameVector<bit Q, bit U, bits<3> size, bits<5> opcode,
|
|||
: I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm), asm,
|
||||
"{\t$Rd" # kind # ", $Rn" # kind # ", $Rm" # kind #
|
||||
"|" # kind # "\t$Rd, $Rn, $Rm|}", "", pattern>,
|
||||
Sched<[WriteV]> {
|
||||
Sched<[!if(Q, WriteVq, WriteVd)]> {
|
||||
bits<5> Rd;
|
||||
bits<5> Rn;
|
||||
bits<5> Rm;
|
||||
|
@ -5333,7 +5333,7 @@ class BaseSIMDThreeSameVectorTied<bit Q, bit U, bits<3> size, bits<5> opcode,
|
|||
: I<(outs regtype:$dst), (ins regtype:$Rd, regtype:$Rn, regtype:$Rm), asm,
|
||||
"{\t$Rd" # kind # ", $Rn" # kind # ", $Rm" # kind #
|
||||
"|" # kind # "\t$Rd, $Rn, $Rm}", "$Rd = $dst", pattern>,
|
||||
Sched<[WriteV]> {
|
||||
Sched<[!if(Q, WriteVq, WriteVd)]> {
|
||||
bits<5> Rd;
|
||||
bits<5> Rn;
|
||||
bits<5> Rm;
|
||||
|
@ -5352,7 +5352,7 @@ class BaseSIMDThreeSameVectorTied<bit Q, bit U, bits<3> size, bits<5> opcode,
|
|||
let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
|
||||
class BaseSIMDThreeSameVectorPseudo<RegisterOperand regtype, list<dag> pattern>
|
||||
: Pseudo<(outs regtype:$dst), (ins regtype:$Rd, regtype:$Rn, regtype:$Rm), pattern>,
|
||||
Sched<[WriteV]>;
|
||||
Sched<[!if(!eq(regtype, V128), WriteVq, WriteVd)]>;
|
||||
|
||||
multiclass SIMDLogicalThreeVectorPseudo<SDPatternOperator OpNode> {
|
||||
def v8i8 : BaseSIMDThreeSameVectorPseudo<V64,
|
||||
|
@ -5705,7 +5705,7 @@ class BaseSIMDTwoSameVector<bit Q, bit U, bits<2> size, bits<5> opcode,
|
|||
: I<(outs regtype:$Rd), (ins regtype:$Rn), asm,
|
||||
"{\t$Rd" # dstkind # ", $Rn" # srckind #
|
||||
"|" # dstkind # "\t$Rd, $Rn}", "", pattern>,
|
||||
Sched<[WriteV]> {
|
||||
Sched<[!if(Q, WriteVq, WriteVd)]> {
|
||||
bits<5> Rd;
|
||||
bits<5> Rn;
|
||||
let Inst{31} = 0;
|
||||
|
@ -5730,7 +5730,7 @@ class BaseSIMDTwoSameVectorTied<bit Q, bit U, bits<2> size, bits<5> opcode,
|
|||
: I<(outs regtype:$dst), (ins regtype:$Rd, regtype:$Rn), asm,
|
||||
"{\t$Rd" # dstkind # ", $Rn" # srckind #
|
||||
"|" # dstkind # "\t$Rd, $Rn}", "$Rd = $dst", pattern>,
|
||||
Sched<[WriteV]> {
|
||||
Sched<[!if(Q, WriteVq, WriteVd)]> {
|
||||
bits<5> Rd;
|
||||
bits<5> Rn;
|
||||
let Inst{31} = 0;
|
||||
|
@ -5776,7 +5776,7 @@ class BaseSIMDVectorLShiftLongBySize<bit Q, bits<2> size,
|
|||
: I<(outs V128:$Rd), (ins regtype:$Rn), asm,
|
||||
"{\t$Rd" # dstkind # ", $Rn" # srckind # ", #" # amount #
|
||||
"|" # dstkind # "\t$Rd, $Rn, #" # amount # "}", "", []>,
|
||||
Sched<[WriteV]> {
|
||||
Sched<[WriteVq]> {
|
||||
bits<5> Rd;
|
||||
bits<5> Rn;
|
||||
let Inst{31} = 0;
|
||||
|
@ -6041,7 +6041,7 @@ class BaseSIMDMixedTwoVector<bit Q, bit U, bits<2> size, bits<5> opcode,
|
|||
: I<(outs outreg:$Rd), (ins inreg:$Rn), asm,
|
||||
"{\t$Rd" # outkind # ", $Rn" # inkind #
|
||||
"|" # outkind # "\t$Rd, $Rn}", "", pattern>,
|
||||
Sched<[WriteV]> {
|
||||
Sched<[WriteVq]> {
|
||||
bits<5> Rd;
|
||||
bits<5> Rn;
|
||||
let Inst{31} = 0;
|
||||
|
@ -6064,7 +6064,7 @@ class BaseSIMDMixedTwoVectorTied<bit Q, bit U, bits<2> size, bits<5> opcode,
|
|||
: I<(outs outreg:$dst), (ins outreg:$Rd, inreg:$Rn), asm,
|
||||
"{\t$Rd" # outkind # ", $Rn" # inkind #
|
||||
"|" # outkind # "\t$Rd, $Rn}", "$Rd = $dst", pattern>,
|
||||
Sched<[WriteV]> {
|
||||
Sched<[WriteVq]> {
|
||||
bits<5> Rd;
|
||||
bits<5> Rn;
|
||||
let Inst{31} = 0;
|
||||
|
@ -6116,7 +6116,7 @@ class BaseSIMDCmpTwoVector<bit Q, bit U, bits<2> size, bits<2> size2,
|
|||
"{\t$Rd" # kind # ", $Rn" # kind # ", #" # zero #
|
||||
"|" # kind # "\t$Rd, $Rn, #" # zero # "}", "",
|
||||
[(set (dty regtype:$Rd), (OpNode (sty regtype:$Rn)))]>,
|
||||
Sched<[WriteV]> {
|
||||
Sched<[!if(Q, WriteVq, WriteVd)]> {
|
||||
bits<5> Rd;
|
||||
bits<5> Rn;
|
||||
let Inst{31} = 0;
|
||||
|
@ -6214,7 +6214,7 @@ class BaseSIMDFPCvtTwoVector<bit Q, bit U, bits<2> size, bits<5> opcode,
|
|||
list<dag> pattern>
|
||||
: I<(outs outtype:$Rd), (ins intype:$Rn), asm,
|
||||
!strconcat("\t$Rd", VdTy, ", $Rn", VnTy), "", pattern>,
|
||||
Sched<[WriteV]> {
|
||||
Sched<[WriteVq]> {
|
||||
bits<5> Rd;
|
||||
bits<5> Rn;
|
||||
let Inst{31} = 0;
|
||||
|
@ -6236,7 +6236,7 @@ class BaseSIMDFPCvtTwoVectorTied<bit Q, bit U, bits<2> size, bits<5> opcode,
|
|||
list<dag> pattern>
|
||||
: I<(outs outtype:$dst), (ins outtype:$Rd, intype:$Rn), asm,
|
||||
!strconcat("\t$Rd", VdTy, ", $Rn", VnTy), "$Rd = $dst", pattern>,
|
||||
Sched<[WriteV]> {
|
||||
Sched<[WriteVq]> {
|
||||
bits<5> Rd;
|
||||
bits<5> Rn;
|
||||
let Inst{31} = 0;
|
||||
|
@ -6299,7 +6299,7 @@ class BaseSIMDDifferentThreeVector<bit U, bits<3> size, bits<4> opcode,
|
|||
: I<(outs outtype:$Rd), (ins intype1:$Rn, intype2:$Rm), asm,
|
||||
"{\t$Rd" # outkind # ", $Rn" # inkind1 # ", $Rm" # inkind2 #
|
||||
"|" # outkind # "\t$Rd, $Rn, $Rm}", "", pattern>,
|
||||
Sched<[WriteV]> {
|
||||
Sched<[WriteVq]> {
|
||||
bits<5> Rd;
|
||||
bits<5> Rn;
|
||||
bits<5> Rm;
|
||||
|
@ -6325,7 +6325,7 @@ class BaseSIMDDifferentThreeVectorTied<bit U, bits<3> size, bits<4> opcode,
|
|||
: I<(outs outtype:$dst), (ins outtype:$Rd, intype1:$Rn, intype2:$Rm), asm,
|
||||
"{\t$Rd" # outkind # ", $Rn" # inkind1 # ", $Rm" # inkind2 #
|
||||
"|" # outkind # "\t$Rd, $Rn, $Rm}", "$Rd = $dst", pattern>,
|
||||
Sched<[WriteV]> {
|
||||
Sched<[WriteVq]> {
|
||||
bits<5> Rd;
|
||||
bits<5> Rn;
|
||||
bits<5> Rm;
|
||||
|
@ -6665,7 +6665,7 @@ class BaseSIMDBitwiseExtract<bit size, RegisterOperand regtype, ValueType vty,
|
|||
"|" # kind # "\t$Rd, $Rn, $Rm, $imm}", "",
|
||||
[(set (vty regtype:$Rd),
|
||||
(AArch64ext regtype:$Rn, regtype:$Rm, (i32 imm:$imm)))]>,
|
||||
Sched<[WriteV]> {
|
||||
Sched<[!if(size, WriteVq, WriteVd)]> {
|
||||
bits<5> Rd;
|
||||
bits<5> Rn;
|
||||
bits<5> Rm;
|
||||
|
@ -6699,7 +6699,7 @@ class BaseSIMDZipVector<bits<3> size, bits<3> opc, RegisterOperand regtype,
|
|||
"{\t$Rd" # kind # ", $Rn" # kind # ", $Rm" # kind #
|
||||
"|" # kind # "\t$Rd, $Rn, $Rm}", "",
|
||||
[(set (valty regtype:$Rd), (OpNode regtype:$Rn, regtype:$Rm))]>,
|
||||
Sched<[WriteV]> {
|
||||
Sched<[!if(!eq(regtype, V128), WriteVq, WriteVd)]> {
|
||||
bits<5> Rd;
|
||||
bits<5> Rn;
|
||||
bits<5> Rm;
|
||||
|
@ -6755,7 +6755,7 @@ class BaseSIMDThreeScalar<bit U, bits<3> size, bits<5> opcode,
|
|||
list<dag> pattern>
|
||||
: I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm), asm,
|
||||
"\t$Rd, $Rn, $Rm", "", pattern>,
|
||||
Sched<[WriteV]> {
|
||||
Sched<[WriteVd]> {
|
||||
bits<5> Rd;
|
||||
bits<5> Rn;
|
||||
bits<5> Rm;
|
||||
|
@ -6775,7 +6775,7 @@ class BaseSIMDThreeScalarTied<bit U, bits<2> size, bit R, bits<5> opcode,
|
|||
dag oops, dag iops, string asm,
|
||||
list<dag> pattern>
|
||||
: I<oops, iops, asm, "\t$Rd, $Rn, $Rm", "$Rd = $dst", pattern>,
|
||||
Sched<[WriteV]> {
|
||||
Sched<[WriteVd]> {
|
||||
bits<5> Rd;
|
||||
bits<5> Rn;
|
||||
bits<5> Rm;
|
||||
|
@ -6868,7 +6868,7 @@ class BaseSIMDThreeScalarMixed<bit U, bits<2> size, bits<5> opcode,
|
|||
dag oops, dag iops, string asm, string cstr, list<dag> pat>
|
||||
: I<oops, iops, asm,
|
||||
"\t$Rd, $Rn, $Rm", cstr, pat>,
|
||||
Sched<[WriteV]> {
|
||||
Sched<[WriteVd]> {
|
||||
bits<5> Rd;
|
||||
bits<5> Rn;
|
||||
bits<5> Rm;
|
||||
|
@ -6921,7 +6921,7 @@ class BaseSIMDTwoScalar<bit U, bits<2> size, bits<2> size2, bits<5> opcode,
|
|||
string asm, list<dag> pat>
|
||||
: I<(outs regtype:$Rd), (ins regtype2:$Rn), asm,
|
||||
"\t$Rd, $Rn", "", pat>,
|
||||
Sched<[WriteV]> {
|
||||
Sched<[WriteVd]> {
|
||||
bits<5> Rd;
|
||||
bits<5> Rn;
|
||||
let Inst{31-30} = 0b01;
|
||||
|
@ -6943,7 +6943,7 @@ class BaseSIMDTwoScalarTied<bit U, bits<2> size, bits<5> opcode,
|
|||
string asm, list<dag> pat>
|
||||
: I<(outs regtype:$dst), (ins regtype:$Rd, regtype2:$Rn), asm,
|
||||
"\t$Rd, $Rn", "$Rd = $dst", pat>,
|
||||
Sched<[WriteV]> {
|
||||
Sched<[WriteVd]> {
|
||||
bits<5> Rd;
|
||||
bits<5> Rn;
|
||||
let Inst{31-30} = 0b01;
|
||||
|
@ -6963,7 +6963,7 @@ class BaseSIMDCmpTwoScalar<bit U, bits<2> size, bits<2> size2, bits<5> opcode,
|
|||
RegisterClass regtype, string asm, string zero>
|
||||
: I<(outs regtype:$Rd), (ins regtype:$Rn), asm,
|
||||
"\t$Rd, $Rn, #" # zero, "", []>,
|
||||
Sched<[WriteV]> {
|
||||
Sched<[WriteVd]> {
|
||||
bits<5> Rd;
|
||||
bits<5> Rn;
|
||||
let Inst{31-30} = 0b01;
|
||||
|
@ -6982,7 +6982,7 @@ class BaseSIMDCmpTwoScalar<bit U, bits<2> size, bits<2> size2, bits<5> opcode,
|
|||
class SIMDInexactCvtTwoScalar<bits<5> opcode, string asm>
|
||||
: I<(outs FPR32:$Rd), (ins FPR64:$Rn), asm, "\t$Rd, $Rn", "",
|
||||
[(set (f32 FPR32:$Rd), (int_aarch64_sisd_fcvtxn (f64 FPR64:$Rn)))]>,
|
||||
Sched<[WriteV]> {
|
||||
Sched<[WriteVd]> {
|
||||
bits<5> Rd;
|
||||
bits<5> Rn;
|
||||
let Inst{31-17} = 0b011111100110000;
|
||||
|
@ -7104,7 +7104,7 @@ class BaseSIMDPairwiseScalar<bit U, bits<2> size, bits<5> opcode,
|
|||
string asm, string kind>
|
||||
: I<(outs regtype:$Rd), (ins vectype:$Rn), asm,
|
||||
"{\t$Rd, $Rn" # kind # "|" # kind # "\t$Rd, $Rn}", "", []>,
|
||||
Sched<[WriteV]> {
|
||||
Sched<[WriteVd]> {
|
||||
bits<5> Rd;
|
||||
bits<5> Rn;
|
||||
let Inst{31-30} = 0b01;
|
||||
|
@ -7144,7 +7144,7 @@ class BaseSIMDAcrossLanes<bit Q, bit U, bits<2> size, bits<5> opcode,
|
|||
string asm, string kind, list<dag> pattern>
|
||||
: I<(outs regtype:$Rd), (ins vectype:$Rn), asm,
|
||||
"{\t$Rd, $Rn" # kind # "|" # kind # "\t$Rd, $Rn}", "", pattern>,
|
||||
Sched<[WriteV]> {
|
||||
Sched<[!if(Q, WriteVq, WriteVd)]> {
|
||||
bits<5> Rd;
|
||||
bits<5> Rn;
|
||||
let Inst{31} = 0;
|
||||
|
@ -7210,7 +7210,7 @@ multiclass SIMDFPAcrossLanes<bits<5> opcode, bit sz1, string asm,
|
|||
class BaseSIMDInsDup<bit Q, bit op, dag outs, dag ins, string asm,
|
||||
string operands, string constraints, list<dag> pattern>
|
||||
: I<outs, ins, asm, operands, constraints, pattern>,
|
||||
Sched<[WriteV]> {
|
||||
Sched<[!if(Q, WriteVq, WriteVd)]> {
|
||||
bits<5> Rd;
|
||||
bits<5> Rn;
|
||||
let Inst{31} = 0;
|
||||
|
@ -7522,7 +7522,7 @@ class BaseSIMDTableLookup<bit Q, bits<2> len, bit op, RegisterOperand vectype,
|
|||
RegisterOperand listtype, string asm, string kind>
|
||||
: I<(outs vectype:$Vd), (ins listtype:$Vn, vectype:$Vm), asm,
|
||||
"\t$Vd" # kind # ", $Vn, $Vm" # kind, "", []>,
|
||||
Sched<[WriteV]> {
|
||||
Sched<[!if(Q, WriteVq, WriteVd)]> {
|
||||
bits<5> Vd;
|
||||
bits<5> Vn;
|
||||
bits<5> Vm;
|
||||
|
@ -7543,7 +7543,7 @@ class BaseSIMDTableLookupTied<bit Q, bits<2> len, bit op, RegisterOperand vectyp
|
|||
RegisterOperand listtype, string asm, string kind>
|
||||
: I<(outs vectype:$dst), (ins vectype:$Vd, listtype:$Vn, vectype:$Vm), asm,
|
||||
"\t$Vd" # kind # ", $Vn, $Vm" # kind, "$Vd = $dst", []>,
|
||||
Sched<[WriteV]> {
|
||||
Sched<[!if(Q, WriteVq, WriteVd)]> {
|
||||
bits<5> Vd;
|
||||
bits<5> Vn;
|
||||
bits<5> Vm;
|
||||
|
@ -7662,7 +7662,7 @@ class BaseSIMDScalarCPY<RegisterClass regtype, RegisterOperand vectype,
|
|||
: I<(outs regtype:$dst), (ins vectype:$src, idxtype:$idx), asm,
|
||||
"{\t$dst, $src" # kind # "$idx" #
|
||||
"|\t$dst, $src$idx}", "", []>,
|
||||
Sched<[WriteV]> {
|
||||
Sched<[WriteVd]> {
|
||||
bits<5> dst;
|
||||
bits<5> src;
|
||||
let Inst{31-21} = 0b01011110000;
|
||||
|
@ -7727,7 +7727,7 @@ class BaseSIMDModifiedImm<bit Q, bit op, bit op2, dag oops, dag iops,
|
|||
string asm, string op_string,
|
||||
string cstr, list<dag> pattern>
|
||||
: I<oops, iops, asm, op_string, cstr, pattern>,
|
||||
Sched<[WriteV]> {
|
||||
Sched<[!if(Q, WriteVq, WriteVd)]> {
|
||||
bits<5> Rd;
|
||||
bits<8> imm8;
|
||||
let Inst{31} = 0;
|
||||
|
@ -7897,7 +7897,7 @@ class BaseSIMDIndexed<bit Q, bit U, bit Scalar, bits<2> size, bits<4> opc,
|
|||
asm,
|
||||
"{\t$Rd" # dst_kind # ", $Rn" # lhs_kind # ", $Rm" # rhs_kind # "$idx" #
|
||||
"|" # apple_kind # "\t$Rd, $Rn, $Rm$idx}", "", pattern>,
|
||||
Sched<[WriteV]> {
|
||||
Sched<[WriteVd]> {
|
||||
bits<5> Rd;
|
||||
bits<5> Rn;
|
||||
bits<5> Rm;
|
||||
|
@ -7927,7 +7927,7 @@ class BaseSIMDIndexedTied<bit Q, bit U, bit Scalar, bits<2> size, bits<4> opc,
|
|||
(ins dst_reg:$Rd, lhs_reg:$Rn, rhs_reg:$Rm, vec_idx:$idx), asm,
|
||||
"{\t$Rd" # dst_kind # ", $Rn" # lhs_kind # ", $Rm" # rhs_kind # "$idx" #
|
||||
"|" # apple_kind # "\t$Rd, $Rn, $Rm$idx}", "$Rd = $dst", pattern>,
|
||||
Sched<[WriteV]> {
|
||||
Sched<[WriteVd]> {
|
||||
bits<5> Rd;
|
||||
bits<5> Rn;
|
||||
bits<5> Rm;
|
||||
|
@ -8020,7 +8020,7 @@ class SIMDBF16MLALIndex<bit Q, string asm, SDPatternOperator OpNode>
|
|||
(v8bf16
|
||||
(AArch64duplane16 (v8bf16 V128_lo:$Rm),
|
||||
VectorIndexH:$idx)))))]>,
|
||||
Sched<[WriteV]> {
|
||||
Sched<[WriteVq]> {
|
||||
bits<5> Rd;
|
||||
bits<5> Rn;
|
||||
bits<4> Rm;
|
||||
|
@ -8941,7 +8941,7 @@ class BaseSIMDScalarShift<bit U, bits<5> opc, bits<7> fixed_imm,
|
|||
Operand immtype, string asm, list<dag> pattern>
|
||||
: I<(outs regtype1:$Rd), (ins regtype2:$Rn, immtype:$imm),
|
||||
asm, "\t$Rd, $Rn, $imm", "", pattern>,
|
||||
Sched<[WriteV]> {
|
||||
Sched<[WriteVd]> {
|
||||
bits<5> Rd;
|
||||
bits<5> Rn;
|
||||
bits<7> imm;
|
||||
|
@ -8961,7 +8961,7 @@ class BaseSIMDScalarShiftTied<bit U, bits<5> opc, bits<7> fixed_imm,
|
|||
Operand immtype, string asm, list<dag> pattern>
|
||||
: I<(outs regtype1:$dst), (ins regtype1:$Rd, regtype2:$Rn, immtype:$imm),
|
||||
asm, "\t$Rd, $Rn, $imm", "$Rd = $dst", pattern>,
|
||||
Sched<[WriteV]> {
|
||||
Sched<[WriteVd]> {
|
||||
bits<5> Rd;
|
||||
bits<5> Rn;
|
||||
bits<7> imm;
|
||||
|
@ -9125,7 +9125,7 @@ class BaseSIMDVectorShift<bit Q, bit U, bits<5> opc, bits<7> fixed_imm,
|
|||
: I<(outs dst_reg:$Rd), (ins src_reg:$Rn, immtype:$imm),
|
||||
asm, "{\t$Rd" # dst_kind # ", $Rn" # src_kind # ", $imm" #
|
||||
"|" # dst_kind # "\t$Rd, $Rn, $imm}", "", pattern>,
|
||||
Sched<[WriteV]> {
|
||||
Sched<[!if(Q, WriteVq, WriteVd)]> {
|
||||
bits<5> Rd;
|
||||
bits<5> Rn;
|
||||
let Inst{31} = 0;
|
||||
|
@ -9148,7 +9148,7 @@ class BaseSIMDVectorShiftTied<bit Q, bit U, bits<5> opc, bits<7> fixed_imm,
|
|||
: I<(outs vectype1:$dst), (ins vectype1:$Rd, vectype2:$Rn, immtype:$imm),
|
||||
asm, "{\t$Rd" # dst_kind # ", $Rn" # src_kind # ", $imm" #
|
||||
"|" # dst_kind # "\t$Rd, $Rn, $imm}", "$Rd = $dst", pattern>,
|
||||
Sched<[WriteV]> {
|
||||
Sched<[!if(Q, WriteVq, WriteVd)]> {
|
||||
bits<5> Rd;
|
||||
bits<5> Rn;
|
||||
let Inst{31} = 0;
|
||||
|
@ -10695,7 +10695,7 @@ class BaseSIMDThreeSameVectorComplex<bit Q, bit U, bits<2> size, bits<3> opcode,
|
|||
: I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm, rottype:$rot), asm,
|
||||
"{\t$Rd" # kind # ", $Rn" # kind # ", $Rm" # kind # ", $rot"
|
||||
"|" # kind # "\t$Rd, $Rn, $Rm, $rot}", "", pattern>,
|
||||
Sched<[WriteV]> {
|
||||
Sched<[!if(Q, WriteVq, WriteVd)]> {
|
||||
bits<5> Rd;
|
||||
bits<5> Rn;
|
||||
bits<5> Rm;
|
||||
|
@ -10769,7 +10769,7 @@ class BaseSIMDThreeSameVectorTiedComplex<bit Q, bit U, bits<2> size,
|
|||
(ins regtype:$Rd, regtype:$Rn, regtype:$Rm, rottype:$rot), asm,
|
||||
"{\t$Rd" # kind # ", $Rn" # kind # ", $Rm" # kind # ", $rot"
|
||||
"|" # kind # "\t$Rd, $Rn, $Rm, $rot}", "$Rd = $dst", pattern>,
|
||||
Sched<[WriteV]> {
|
||||
Sched<[!if(Q, WriteVq, WriteVd)]> {
|
||||
bits<5> Rd;
|
||||
bits<5> Rn;
|
||||
bits<5> Rm;
|
||||
|
@ -10845,7 +10845,7 @@ class BaseSIMDIndexedTiedComplex<bit Q, bit U, bit Scalar, bits<2> size,
|
|||
"{\t$Rd" # dst_kind # ", $Rn" # lhs_kind # ", $Rm" # rhs_kind #
|
||||
"$idx, $rot" # "|" # apple_kind #
|
||||
"\t$Rd, $Rn, $Rm$idx, $rot}", "$Rd = $dst", pattern>,
|
||||
Sched<[WriteV]> {
|
||||
Sched<[!if(Q, WriteVq, WriteVd)]> {
|
||||
bits<5> Rd;
|
||||
bits<5> Rn;
|
||||
bits<5> Rm;
|
||||
|
@ -10910,7 +10910,7 @@ let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
|
|||
class AESBase<bits<4> opc, string asm, dag outs, dag ins, string cstr,
|
||||
list<dag> pat>
|
||||
: I<outs, ins, asm, "{\t$Rd.16b, $Rn.16b|.16b\t$Rd, $Rn}", cstr, pat>,
|
||||
Sched<[WriteV]>{
|
||||
Sched<[WriteVq]>{
|
||||
bits<5> Rd;
|
||||
bits<5> Rn;
|
||||
let Inst{31-16} = 0b0100111000101000;
|
||||
|
@ -10936,7 +10936,7 @@ class SHA3OpTiedInst<bits<3> opc, string asm, string dst_lhs_kind,
|
|||
: I<oops, iops, asm,
|
||||
"{\t$Rd" # dst_lhs_kind # ", $Rn" # dst_lhs_kind # ", $Rm.4s" #
|
||||
"|.4s\t$Rd, $Rn, $Rm}", "$Rd = $dst", pat>,
|
||||
Sched<[WriteV]>{
|
||||
Sched<[WriteVq]>{
|
||||
bits<5> Rd;
|
||||
bits<5> Rn;
|
||||
bits<5> Rm;
|
||||
|
@ -10976,7 +10976,7 @@ class SHA2OpInst<bits<4> opc, string asm, string kind,
|
|||
list<dag> pat>
|
||||
: I<oops, iops, asm, "{\t$Rd" # kind # ", $Rn" # kind #
|
||||
"|" # kind # "\t$Rd, $Rn}", cstr, pat>,
|
||||
Sched<[WriteV]>{
|
||||
Sched<[WriteVq]>{
|
||||
bits<5> Rd;
|
||||
bits<5> Rn;
|
||||
let Inst{31-16} = 0b0101111000101000;
|
||||
|
@ -10999,7 +10999,7 @@ class SHAInstSS<bits<4> opc, string asm, Intrinsic OpNode>
|
|||
// Armv8.2-A Crypto extensions
|
||||
class BaseCryptoV82<dag oops, dag iops, string asm, string asmops, string cst,
|
||||
list<dag> pattern>
|
||||
: I <oops, iops, asm, asmops, cst, pattern>, Sched<[WriteV]> {
|
||||
: I <oops, iops, asm, asmops, cst, pattern>, Sched<[WriteVq]> {
|
||||
bits<5> Vd;
|
||||
bits<5> Vn;
|
||||
let Inst{31-25} = 0b1100111;
|
||||
|
|
|
@ -7006,9 +7006,9 @@ def AESIMCrr : AESInst< 0b0111, "aesimc", int_aarch64_crypto_aesimc>;
|
|||
// for AES fusion on some CPUs.
|
||||
let hasSideEffects = 0, mayStore = 0, mayLoad = 0 in {
|
||||
def AESMCrrTied: Pseudo<(outs V128:$Rd), (ins V128:$Rn), [], "$Rn = $Rd">,
|
||||
Sched<[WriteV]>;
|
||||
Sched<[WriteVq]>;
|
||||
def AESIMCrrTied: Pseudo<(outs V128:$Rd), (ins V128:$Rn), [], "$Rn = $Rd">,
|
||||
Sched<[WriteV]>;
|
||||
Sched<[WriteVq]>;
|
||||
}
|
||||
|
||||
// Only use constrained versions of AES(I)MC instructions if they are paired with
|
||||
|
|
|
@ -127,7 +127,8 @@ def : WriteRes<WriteFCmp, [A53UnitFPALU]> { let Latency = 6; }
|
|||
def : WriteRes<WriteFCvt, [A53UnitFPALU]> { let Latency = 6; }
|
||||
def : WriteRes<WriteFCopy, [A53UnitFPALU]> { let Latency = 6; }
|
||||
def : WriteRes<WriteFImm, [A53UnitFPALU]> { let Latency = 6; }
|
||||
def : WriteRes<WriteV, [A53UnitFPALU]> { let Latency = 6; }
|
||||
def : WriteRes<WriteVd, [A53UnitFPALU]> { let Latency = 6; }
|
||||
def : WriteRes<WriteVq, [A53UnitFPALU]> { let Latency = 6; }
|
||||
|
||||
// FP Mul, Div, Sqrt
|
||||
def : WriteRes<WriteFMul, [A53UnitFPMDS]> { let Latency = 6; }
|
||||
|
|
|
@ -149,9 +149,11 @@ def : WriteRes<WriteFCmp, [CortexA55UnitFPALU]> { let Latency = 3; }
|
|||
def : WriteRes<WriteFCvt, [CortexA55UnitFPALU]> { let Latency = 4; }
|
||||
def : WriteRes<WriteFCopy, [CortexA55UnitFPALU]> { let Latency = 3; }
|
||||
def : WriteRes<WriteFImm, [CortexA55UnitFPALU]> { let Latency = 3; }
|
||||
def : WriteRes<WriteV, [CortexA55UnitFPALU]> { let Latency = 4; }
|
||||
def : WriteRes<WriteVd, [CortexA55UnitFPALU]> { let Latency = 4; }
|
||||
def : WriteRes<WriteVq, [CortexA55UnitFPALU,CortexA55UnitFPALU]> { let Latency = 4; let BeginGroup = 1; }
|
||||
|
||||
// FP ALU specific new schedwrite definitions
|
||||
def CortexA55WriteFPALU_F2 : SchedWriteRes<[CortexA55UnitFPALU]> { let Latency = 2;}
|
||||
def CortexA55WriteFPALU_F3 : SchedWriteRes<[CortexA55UnitFPALU]> { let Latency = 3;}
|
||||
def CortexA55WriteFPALU_F4 : SchedWriteRes<[CortexA55UnitFPALU]> { let Latency = 4;}
|
||||
def CortexA55WriteFPALU_F5 : SchedWriteRes<[CortexA55UnitFPALU]> { let Latency = 5;}
|
||||
|
@ -331,6 +333,8 @@ def : InstRW<[CortexA55WriteVST4, WriteAdr], (instregex "ST4Fourv(8b|4h|2s|1d|16
|
|||
//---
|
||||
// Floating Point Conversions, MAC, DIV, SQRT
|
||||
//---
|
||||
def : InstRW<[CortexA55WriteFPALU_F2], (instregex "^DUP(v2i64|v4i32|v8i16|v16i8)")>;
|
||||
def : InstRW<[CortexA55WriteFPALU_F2], (instregex "^XTN")>;
|
||||
def : InstRW<[CortexA55WriteFPALU_F3], (instregex "^FCVT[ALMNPZ][SU](S|U)?(W|X)")>;
|
||||
def : InstRW<[CortexA55WriteFPALU_F4], (instregex "^FCVT(X)?[ALMNPXZ](S|U|N)?v")>;
|
||||
|
||||
|
|
|
@ -96,7 +96,8 @@ def : SchedAlias<WriteFCopy, A57Write_5cyc_1L>;
|
|||
def : SchedAlias<WriteFImm, A57Write_3cyc_1V>;
|
||||
def : WriteRes<WriteFMul, [A57UnitV]> { let Latency = 5;}
|
||||
def : SchedAlias<WriteFDiv, A57Write_17cyc_1W>;
|
||||
def : SchedAlias<WriteV, A57Write_3cyc_1V>;
|
||||
def : SchedAlias<WriteVd, A57Write_3cyc_1V>;
|
||||
def : SchedAlias<WriteVq, A57Write_3cyc_1V>;
|
||||
def : SchedAlias<WriteVLD, A57Write_5cyc_1L>;
|
||||
def : SchedAlias<WriteVST, A57Write_1cyc_1S>;
|
||||
|
||||
|
|
|
@ -1627,7 +1627,11 @@ def : InstRW<[A64FXWrite_FMOV_VG14], (instrs FMOVDXHighr)>;
|
|||
// ASIMD shift by register, basic, Q-form
|
||||
// ASIMD shift by register, complex, D-form
|
||||
// ASIMD shift by register, complex, Q-form
|
||||
def : WriteRes<WriteV, [A64FXGI03]> {
|
||||
def : WriteRes<WriteVd, [A64FXGI03]> {
|
||||
let Latency = 4;
|
||||
let ResourceCycles = [1];
|
||||
}
|
||||
def : WriteRes<WriteVq, [A64FXGI03]> {
|
||||
let Latency = 4;
|
||||
let ResourceCycles = [1];
|
||||
}
|
||||
|
|
|
@ -304,7 +304,8 @@ def : WriteRes<WriteSys, []> {let Latency = -1;}
|
|||
// 7.9 Vector Unit Instructions
|
||||
|
||||
// Simple vector operations take 2 cycles.
|
||||
def : WriteRes<WriteV, [CyUnitV]> {let Latency = 2;}
|
||||
def : WriteRes<WriteVd, [CyUnitV]> {let Latency = 2;}
|
||||
def : WriteRes<WriteVq, [CyUnitV]> {let Latency = 2;}
|
||||
|
||||
// Define some longer latency vector op types for Cyclone.
|
||||
def CyWriteV3 : SchedWriteRes<[CyUnitV]> {let Latency = 3;}
|
||||
|
@ -335,7 +336,7 @@ def : WriteRes<WriteFImm, [CyUnitV]> {let Latency = 2;}
|
|||
// COPY is handled above in the WriteMov Variant.
|
||||
def WriteVMov : SchedWriteVariant<[
|
||||
SchedVar<WriteVMovPred, [WriteX]>,
|
||||
SchedVar<NoSchedPred, [WriteV]>]>;
|
||||
SchedVar<NoSchedPred, [WriteVq]>]>;
|
||||
def : InstRW<[WriteVMov], (instrs ORRv16i8)>;
|
||||
|
||||
// FMOVSr,FMOVDr are WriteF.
|
||||
|
@ -355,7 +356,7 @@ def : WriteRes<WriteFCopy, [CyUnitLS]> {
|
|||
def : InstRW<[WriteLD], (instrs FMOVSWr,FMOVDXr,FMOVDXHighr)>;
|
||||
|
||||
// INS V[x],R
|
||||
def CyWriteCopyToFPR : WriteSequence<[WriteVLD, WriteV]>;
|
||||
def CyWriteCopyToFPR : WriteSequence<[WriteVLD, WriteVq]>;
|
||||
def : InstRW<[CyWriteCopyToFPR], (instregex "INSv")>;
|
||||
|
||||
// SMOV,UMOV R,V[x]
|
||||
|
@ -571,7 +572,7 @@ def : InstRW<[WriteFRSQRTS], (instregex "FRSQRTSv")>;
|
|||
//---
|
||||
|
||||
// FCVT lengthen f16/s32
|
||||
def : InstRW<[WriteV], (instrs FCVTSHr,FCVTDHr,FCVTDSr)>;
|
||||
def : InstRW<[WriteVq], (instrs FCVTSHr,FCVTDHr,FCVTDSr)>;
|
||||
|
||||
// FCVT,FCVTN,FCVTXN
|
||||
// SCVTF,UCVTF V,V
|
||||
|
@ -681,61 +682,61 @@ def : InstRW<[WriteVLDShuffle],
|
|||
def : InstRW<[WriteVLDShuffle, WriteAdr],
|
||||
(instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
|
||||
|
||||
def : InstRW<[WriteVLDShuffle, WriteV],
|
||||
def : InstRW<[WriteVLDShuffle, WriteVq],
|
||||
(instregex "LD2Twov(8b|4h|2s)$")>;
|
||||
def : InstRW<[WriteVLDShuffle, WriteAdr, WriteV],
|
||||
def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVq],
|
||||
(instregex "LD2Twov(8b|4h|2s)_POST$")>;
|
||||
def : InstRW<[WriteVLDShuffle, WriteVLDShuffle],
|
||||
(instregex "LD2Twov(16b|8h|4s|2d)$")>;
|
||||
def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle],
|
||||
(instregex "LD2Twov(16b|8h|4s|2d)_POST")>;
|
||||
|
||||
def : InstRW<[WriteVLDShuffle, ReadVLD, WriteV],
|
||||
def : InstRW<[WriteVLDShuffle, ReadVLD, WriteVq],
|
||||
(instregex "LD2i(8|16|32)$")>;
|
||||
def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteV],
|
||||
def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteVq],
|
||||
(instregex "LD2i(8|16|32)_POST")>;
|
||||
def : InstRW<[WriteVLDShuffle, ReadVLD, WriteV],
|
||||
def : InstRW<[WriteVLDShuffle, ReadVLD, WriteVq],
|
||||
(instregex "LD2i64$")>;
|
||||
def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteV],
|
||||
def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteVq],
|
||||
(instregex "LD2i64_POST")>;
|
||||
|
||||
def : InstRW<[WriteVLDShuffle, WriteV],
|
||||
def : InstRW<[WriteVLDShuffle, WriteVq],
|
||||
(instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
|
||||
def : InstRW<[WriteVLDShuffle, WriteAdr, WriteV],
|
||||
def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVq],
|
||||
(instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST")>;
|
||||
|
||||
def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteV],
|
||||
def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteVq],
|
||||
(instregex "LD3Threev(8b|4h|2s)$")>;
|
||||
def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteV],
|
||||
def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteVq],
|
||||
(instregex "LD3Threev(8b|4h|2s)_POST")>;
|
||||
def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteVLDShuffle],
|
||||
(instregex "LD3Threev(16b|8h|4s|2d)$")>;
|
||||
def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteVLDShuffle],
|
||||
(instregex "LD3Threev(16b|8h|4s|2d)_POST")>;
|
||||
|
||||
def : InstRW<[WriteVLDShuffle, ReadVLD, WriteV, WriteV],
|
||||
def : InstRW<[WriteVLDShuffle, ReadVLD, WriteVq, WriteVq],
|
||||
(instregex "LD3i(8|16|32)$")>;
|
||||
def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteV, WriteV],
|
||||
def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteVq, WriteVq],
|
||||
(instregex "LD3i(8|16|32)_POST")>;
|
||||
|
||||
def : InstRW<[WriteVLDShuffle, ReadVLD, WriteVLDShuffle, WriteV],
|
||||
def : InstRW<[WriteVLDShuffle, ReadVLD, WriteVLDShuffle, WriteVq],
|
||||
(instregex "LD3i64$")>;
|
||||
def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteVLDShuffle, WriteV],
|
||||
def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteVLDShuffle, WriteVq],
|
||||
(instregex "LD3i64_POST")>;
|
||||
|
||||
def : InstRW<[WriteVLDShuffle, WriteV, WriteV],
|
||||
def : InstRW<[WriteVLDShuffle, WriteVq, WriteVq],
|
||||
(instregex "LD3Rv(8b|4h|2s|16b|8h|4s)$")>;
|
||||
def : InstRW<[WriteVLDShuffle, WriteAdr, WriteV, WriteV],
|
||||
def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVq, WriteVq],
|
||||
(instregex "LD3Rv(8b|4h|2s|16b|8h|4s)_POST")>;
|
||||
|
||||
def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteV],
|
||||
def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteVq],
|
||||
(instrs LD3Rv1d,LD3Rv2d)>;
|
||||
def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteV],
|
||||
def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteVq],
|
||||
(instrs LD3Rv1d_POST,LD3Rv2d_POST)>;
|
||||
|
||||
def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteV, WriteV],
|
||||
def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteVq, WriteVq],
|
||||
(instregex "LD4Fourv(8b|4h|2s)$")>;
|
||||
def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteV, WriteV],
|
||||
def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteVq, WriteVq],
|
||||
(instregex "LD4Fourv(8b|4h|2s)_POST")>;
|
||||
def : InstRW<[WriteVLDPairShuffle, WriteVLDPairShuffle,
|
||||
WriteVLDPairShuffle, WriteVLDPairShuffle],
|
||||
|
@ -744,25 +745,25 @@ def : InstRW<[WriteVLDPairShuffle, WriteAdr, WriteVLDPairShuffle,
|
|||
WriteVLDPairShuffle, WriteVLDPairShuffle],
|
||||
(instregex "LD4Fourv(16b|8h|4s|2d)_POST")>;
|
||||
|
||||
def : InstRW<[WriteVLDShuffle, ReadVLD, WriteV, WriteV, WriteV],
|
||||
def : InstRW<[WriteVLDShuffle, ReadVLD, WriteVq, WriteVq, WriteVq],
|
||||
(instregex "LD4i(8|16|32)$")>;
|
||||
def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteV, WriteV, WriteV],
|
||||
def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteVq, WriteVq, WriteVq],
|
||||
(instregex "LD4i(8|16|32)_POST")>;
|
||||
|
||||
|
||||
def : InstRW<[WriteVLDShuffle, ReadVLD, WriteVLDShuffle, WriteV, WriteV],
|
||||
def : InstRW<[WriteVLDShuffle, ReadVLD, WriteVLDShuffle, WriteVq, WriteVq],
|
||||
(instrs LD4i64)>;
|
||||
def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteVLDShuffle, WriteV],
|
||||
def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteVLDShuffle, WriteVq],
|
||||
(instrs LD4i64_POST)>;
|
||||
|
||||
def : InstRW<[WriteVLDShuffle, WriteV, WriteV, WriteV],
|
||||
def : InstRW<[WriteVLDShuffle, WriteVq, WriteVq, WriteVq],
|
||||
(instregex "LD4Rv(8b|4h|2s|16b|8h|4s)$")>;
|
||||
def : InstRW<[WriteVLDShuffle, WriteAdr, WriteV, WriteV, WriteV],
|
||||
def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVq, WriteVq, WriteVq],
|
||||
(instregex "LD4Rv(8b|4h|2s|16b|8h|4s)_POST")>;
|
||||
|
||||
def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteV, WriteV],
|
||||
def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteVq, WriteVq],
|
||||
(instrs LD4Rv1d,LD4Rv2d)>;
|
||||
def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteV, WriteV],
|
||||
def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteVq, WriteVq],
|
||||
(instrs LD4Rv1d_POST,LD4Rv2d_POST)>;
|
||||
|
||||
//---
|
||||
|
|
|
@ -254,7 +254,8 @@ def : WriteRes<WriteVST, [M3UnitS,
|
|||
let NumMicroOps = 1; }
|
||||
|
||||
// ASIMD FP instructions.
|
||||
def : WriteRes<WriteV, [M3UnitNALU]> { let Latency = 3; }
|
||||
def : WriteRes<WriteVd, [M3UnitNALU]> { let Latency = 3; }
|
||||
def : WriteRes<WriteVq, [M3UnitNALU]> { let Latency = 3; }
|
||||
|
||||
// Other miscellaneous instructions.
|
||||
def : WriteRes<WriteAtomic, []> { let Unsupported = 1; }
|
||||
|
|
|
@ -558,7 +558,8 @@ def : SchedAlias<WriteVLD, M4WriteL5>;
|
|||
def : SchedAlias<WriteVST, M4WriteVST1>;
|
||||
|
||||
// ASIMD FP instructions.
|
||||
def : SchedAlias<WriteV, M4WriteNALU1>;
|
||||
def : SchedAlias<WriteVd, M4WriteNALU1>;
|
||||
def : SchedAlias<WriteVq, M4WriteNALU1>;
|
||||
|
||||
// Other miscellaneous instructions.
|
||||
def : WriteRes<WriteAtomic, []> { let Unsupported = 1; }
|
||||
|
|
|
@ -594,7 +594,8 @@ def : SchedAlias<WriteVLD, M5WriteL6>;
|
|||
def : SchedAlias<WriteVST, M5WriteVST1>;
|
||||
|
||||
// ASIMD FP instructions.
|
||||
def : SchedAlias<WriteV, M5WriteNALU1>;
|
||||
def : SchedAlias<WriteVd, M5WriteNALU1>;
|
||||
def : SchedAlias<WriteVq, M5WriteNALU1>;
|
||||
|
||||
// Other miscellaneous instructions.
|
||||
def : WriteRes<WriteBarrier, []> { let Latency = 1; }
|
||||
|
|
|
@ -92,7 +92,8 @@ def : WriteRes<WriteFCopy, []> { let Unsupported = 1; }
|
|||
def : WriteRes<WriteFImm, []> { let Unsupported = 1; }
|
||||
def : WriteRes<WriteFMul, []> { let Unsupported = 1; }
|
||||
def : WriteRes<WriteFDiv, []> { let Unsupported = 1; }
|
||||
def : WriteRes<WriteV, []> { let Unsupported = 1; }
|
||||
def : WriteRes<WriteVd, []> { let Unsupported = 1; }
|
||||
def : WriteRes<WriteVq, []> { let Unsupported = 1; }
|
||||
def : WriteRes<WriteVLD, []> { let Unsupported = 1; }
|
||||
def : WriteRes<WriteVST, []> { let Unsupported = 1; }
|
||||
def : WriteRes<WriteSys, []> { let Unsupported = 1; }
|
||||
|
|
|
@ -95,7 +95,8 @@ def : WriteRes<WriteFMul, [KryoUnitX, KryoUnitX]>
|
|||
{ let Latency = 6; let NumMicroOps = 2; }
|
||||
def : WriteRes<WriteFDiv, [KryoUnitXA, KryoUnitY]>
|
||||
{ let Latency = 12; let NumMicroOps = 2; } // Fragent -1 / NoRSV +1
|
||||
def : WriteRes<WriteV, [KryoUnitXY]> { let Latency = 6; }
|
||||
def : WriteRes<WriteVd, [KryoUnitXY]> { let Latency = 6; }
|
||||
def : WriteRes<WriteVq, [KryoUnitXY]> { let Latency = 6; }
|
||||
def : WriteRes<WriteVLD, [KryoUnitLS]> { let Latency = 4; }
|
||||
def : WriteRes<WriteVST, [KryoUnitLS]> { let Latency = 4; }
|
||||
|
||||
|
|
|
@ -90,7 +90,8 @@ def : WriteRes<WriteFMul, [TSV110UnitF]> { let Latency = 5; }
|
|||
// FP Div, Sqrt
|
||||
def : WriteRes<WriteFDiv, [TSV110UnitFSU1]> { let Latency = 18; }
|
||||
|
||||
def : WriteRes<WriteV, [TSV110UnitF]> { let Latency = 4; }
|
||||
def : WriteRes<WriteVd, [TSV110UnitF]> { let Latency = 4; }
|
||||
def : WriteRes<WriteVq, [TSV110UnitF]> { let Latency = 4; }
|
||||
def : WriteRes<WriteVLD, [TSV110UnitFLdSt]> { let Latency = 5; }
|
||||
def : WriteRes<WriteVST, [TSV110UnitF]> { let Latency = 1; }
|
||||
|
||||
|
|
|
@ -154,7 +154,8 @@ def : WriteRes<WriteFCmp, [THXT8XUnitFPALU]> { let Latency = 6; }
|
|||
def : WriteRes<WriteFCvt, [THXT8XUnitFPALU]> { let Latency = 6; }
|
||||
def : WriteRes<WriteFCopy, [THXT8XUnitFPALU]> { let Latency = 6; }
|
||||
def : WriteRes<WriteFImm, [THXT8XUnitFPALU]> { let Latency = 6; }
|
||||
def : WriteRes<WriteV, [THXT8XUnitFPALU]> { let Latency = 6; }
|
||||
def : WriteRes<WriteVd, [THXT8XUnitFPALU]> { let Latency = 6; }
|
||||
def : WriteRes<WriteVq, [THXT8XUnitFPALU]> { let Latency = 6; }
|
||||
|
||||
// FP Mul, Div, Sqrt
|
||||
def : WriteRes<WriteFMul, [THXT8XUnitFPMDS]> { let Latency = 6; }
|
||||
|
|
|
@ -1250,7 +1250,12 @@ def : InstRW<[THX2T99Write_5Cyc_F01], (instrs FMOVXDHighr, FMOVDXHighr)>;
|
|||
// ASIMD shift by register, basic, Q-form
|
||||
// ASIMD shift by register, complex, D-form
|
||||
// ASIMD shift by register, complex, Q-form
|
||||
def : WriteRes<WriteV, [THX2T99F01]> {
|
||||
def : WriteRes<WriteVd, [THX2T99F01]> {
|
||||
let Latency = 7;
|
||||
let NumMicroOps = 4;
|
||||
let ResourceCycles = [4];
|
||||
}
|
||||
def : WriteRes<WriteVq, [THX2T99F01]> {
|
||||
let Latency = 7;
|
||||
let NumMicroOps = 4;
|
||||
let ResourceCycles = [4];
|
||||
|
|
|
@ -1357,7 +1357,12 @@ def : InstRW<[THX3T110Write_5Cyc_F01], (instrs FMOVXDHighr, FMOVDXHighr)>;
|
|||
// ASIMD shift by register, basic, Q-form
|
||||
// ASIMD shift by register, complex, D-form
|
||||
// ASIMD shift by register, complex, Q-form
|
||||
def : WriteRes<WriteV, [THX3T110FP0123]> {
|
||||
def : WriteRes<WriteVd, [THX3T110FP0123]> {
|
||||
let Latency = 5;
|
||||
let NumMicroOps = 4;
|
||||
let ResourceCycles = [4];
|
||||
}
|
||||
def : WriteRes<WriteVq, [THX3T110FP0123]> {
|
||||
let Latency = 5;
|
||||
let NumMicroOps = 4;
|
||||
let ResourceCycles = [4];
|
||||
|
|
|
@ -77,7 +77,8 @@ def WriteFImm : SchedWrite; // Floating-point immediate.
|
|||
def WriteFMul : SchedWrite; // Floating-point multiply.
|
||||
def WriteFDiv : SchedWrite; // Floating-point division.
|
||||
|
||||
def WriteV : SchedWrite; // Vector ops.
|
||||
def WriteVd : SchedWrite; // 64bit Vector D ops.
|
||||
def WriteVq : SchedWrite; // 128bit Vector Q ops.
|
||||
def WriteVLD : SchedWrite; // Vector loads.
|
||||
def WriteVST : SchedWrite; // Vector stores.
|
||||
|
||||
|
@ -87,9 +88,9 @@ def WriteAtomic : SchedWrite; // Atomic memory operations (CAS, Swap, LDOP)
|
|||
def ReadVLD : SchedRead;
|
||||
|
||||
// Sequential vector load and shuffle.
|
||||
def WriteVLDShuffle : WriteSequence<[WriteVLD, WriteV]>;
|
||||
def WriteVLDPairShuffle : WriteSequence<[WriteVLD, WriteV, WriteV]>;
|
||||
def WriteVLDShuffle : WriteSequence<[WriteVLD, WriteVq]>;
|
||||
def WriteVLDPairShuffle : WriteSequence<[WriteVLD, WriteVq, WriteVq]>;
|
||||
|
||||
// Store a shuffled vector.
|
||||
def WriteVSTShuffle : WriteSequence<[WriteV, WriteVST]>;
|
||||
def WriteVSTPairShuffle : WriteSequence<[WriteV, WriteV, WriteVST]>;
|
||||
def WriteVSTShuffle : WriteSequence<[WriteVq, WriteVST]>;
|
||||
def WriteVSTPairShuffle : WriteSequence<[WriteVq, WriteVq, WriteVST]>;
|
||||
|
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue