forked from OSchip/llvm-project
Partially revert "[SchedModels][CortexA55] Add ASIMD integer instructions"
The Cortex-A55 scheduling model is used for -mcpu=generic, meaning it can have a wider effect than just the A55. The changes to the A55 scheduling model seems to have caused performance regressions on Cortex-A510 device which have latencies closer to the original and different forwarding paths. This partially reverts the changes from D117003, at least until we can do something to improve Cortex-A510. According to my results, this improves the A510 results without altering the A55 very much.
This commit is contained in:
parent
24d4f601aa
commit
61b616755a
|
@ -6,7 +6,10 @@
|
|||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file defines the machine model for the ARM Cortex-A55 processors.
|
||||
// This file defines the machine model for the ARM Cortex-A55 processors. Note
|
||||
// that this schedule is currently used as the default for -mcpu=generic. As a
|
||||
// result, some of the modelling decision made do not precisely model the
|
||||
// Cortex-A55, instead aiming to be a good compromise between different cpus.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
|
@ -158,10 +161,6 @@ class CortexA55WriteVq<int n, ProcResourceKind res> : SchedWriteRes<[res, res]>
|
|||
let Latency = n;
|
||||
let BeginGroup = 1;
|
||||
}
|
||||
class CortexA55WriteVqL<int n, ProcResourceKind res> : SchedWriteRes<[res, res, res, res]> {
|
||||
let Latency = n;
|
||||
let BeginGroup = 1;
|
||||
}
|
||||
def CortexA55WriteDotScVq_4 : CortexA55WriteVq<4, CortexA55UnitFPALU>;
|
||||
def CortexA55WriteDotVq_4 : CortexA55WriteVq<4, CortexA55UnitFPALU>;
|
||||
def CortexA55WriteDotVd_4 : CortexA55WriteVd<4, CortexA55UnitFPALU>;
|
||||
|
@ -176,7 +175,6 @@ def CortexA55WriteAluVd_2 : CortexA55WriteVd<2, CortexA55UnitFPALU>;
|
|||
def CortexA55WriteAluVq_2 : CortexA55WriteVq<2, CortexA55UnitFPALU>;
|
||||
def CortexA55WriteAluVd_1 : CortexA55WriteVd<1, CortexA55UnitFPALU>;
|
||||
def CortexA55WriteAluVq_1 : CortexA55WriteVq<1, CortexA55UnitFPALU>;
|
||||
def CortexA55WriteAluVqL_4 : CortexA55WriteVqL<4, CortexA55UnitFPALU>;
|
||||
def : SchedAlias<WriteVd, CortexA55WriteVd<4, CortexA55UnitFPALU>>;
|
||||
def : SchedAlias<WriteVq, CortexA55WriteVq<4, CortexA55UnitFPALU>>;
|
||||
|
||||
|
@ -257,13 +255,6 @@ def : ReadAdvance<ReadID, 1, [WriteImm,WriteI,
|
|||
WriteID32,WriteID64,
|
||||
WriteIM32,WriteIM64]>;
|
||||
|
||||
// NEON ALU/MAC forwarding paths
|
||||
def CortexA55ReadMla : SchedReadAdvance<3, [CortexA55WriteMlaVd_4, CortexA55WriteMlaVq_4]>;
|
||||
def CortexA55ReadMlaIx : SchedReadAdvance<3, [CortexA55WriteMlaIxVq_4]>;
|
||||
def CortexA55ReadMlaL : SchedReadAdvance<3, [CortexA55WriteMlaLVq_4]>;
|
||||
def CortexA55ReadDot : SchedReadAdvance<3, [CortexA55WriteDotVd_4, CortexA55WriteDotVq_4]>;
|
||||
def CortexA55ReadDotSc : SchedReadAdvance<3, [CortexA55WriteDotScVq_4]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Subtarget-specific InstRWs.
|
||||
|
||||
|
@ -398,7 +389,7 @@ def : InstRW<[CortexA55WriteFSqrtDP], (instregex "^.*SQRT.*64$")>;
|
|||
def : InstRW<[CortexA55WriteAluVd_3], (instregex "[SU]ABDv(2i32|4i16|8i8)")>;
|
||||
def : InstRW<[CortexA55WriteAluVq_3], (instregex "[SU]ABDv(16i8|4i32|8i16)")>;
|
||||
// ASIMD absolute diff accum
|
||||
def : InstRW<[CortexA55WriteAluVqL_4], (instregex "[SU]ABAL?v")>;
|
||||
def : InstRW<[CortexA55WriteAluVq_4], (instregex "[SU]ABAL?v")>;
|
||||
// ASIMD absolute diff long
|
||||
def : InstRW<[CortexA55WriteAluVq_3], (instregex "[SU]ABDLv")>;
|
||||
// ASIMD arith #1
|
||||
|
@ -419,7 +410,7 @@ def : InstRW<[CortexA55WriteAluVq_3], (instregex "ABSv(2i64|4i32|8i16|16i8)$",
|
|||
def : InstRW<[CortexA55WriteAluVq_3], (instregex "SADDLv", "UADDLv", "SADDWv",
|
||||
"UADDWv", "SSUBLv", "USUBLv", "SSUBWv", "USUBWv", "ADDHNv", "SUBHNv")>;
|
||||
// ASIMD arith #5
|
||||
def : InstRW<[CortexA55WriteAluVqL_4], (instregex "RADDHNv", "RSUBHNv")>;
|
||||
def : InstRW<[CortexA55WriteAluVq_4], (instregex "RADDHNv", "RSUBHNv")>;
|
||||
// ASIMD arith, reduce
|
||||
def : InstRW<[CortexA55WriteAluVq_3], (instregex "ADDVv", "SADDLVv", "UADDLVv")>;
|
||||
// ASIMD compare #1
|
||||
|
@ -445,31 +436,31 @@ def : InstRW<[CortexA55WriteAluVq_4], (instregex "MULv(2i32|4i16|4i32|8i16)_inde
|
|||
def : InstRW<[CortexA55WriteAluVd_3], (instrs PMULv8i8)>;
|
||||
def : InstRW<[CortexA55WriteAluVq_3], (instrs PMULv16i8)>;
|
||||
// ASIMD multiply accumulate
|
||||
def : InstRW<[CortexA55WriteMlaVd_4, CortexA55ReadMla], (instregex "ML[AS]v(2i32|4i16|8i8)$")>;
|
||||
def : InstRW<[CortexA55WriteMlaVq_4, CortexA55ReadMla], (instregex "ML[AS]v(16i8|4i32|8i16)$")>;
|
||||
def : InstRW<[CortexA55WriteMlaIxVq_4, CortexA55ReadMlaIx], (instregex "ML[AS]v(2i32|4i16|4i32|8i16)_indexed$")>;
|
||||
def : InstRW<[CortexA55WriteMlaVd_4], (instregex "ML[AS]v(2i32|4i16|8i8)$")>;
|
||||
def : InstRW<[CortexA55WriteMlaVq_4], (instregex "ML[AS]v(16i8|4i32|8i16)$")>;
|
||||
def : InstRW<[CortexA55WriteMlaIxVq_4], (instregex "ML[AS]v(2i32|4i16|4i32|8i16)_indexed$")>;
|
||||
// ASIMD multiply accumulate half
|
||||
def : InstRW<[CortexA55WriteAluVq_4], (instregex "SQRDML[AS]H[vi]")>;
|
||||
// ASIMD multiply accumulate long
|
||||
def : InstRW<[CortexA55WriteMlaLVq_4, CortexA55ReadMlaL], (instregex "[SU]ML[AS]Lv")>;
|
||||
def : InstRW<[CortexA55WriteMlaLVq_4], (instregex "[SU]ML[AS]Lv")>;
|
||||
// ASIMD multiply accumulate long #2
|
||||
def : InstRW<[CortexA55WriteAluVq_4], (instregex "SQDML[AS]L[iv]")>;
|
||||
// ASIMD dot product
|
||||
def : InstRW<[CortexA55WriteDotVd_4, CortexA55ReadDot], (instregex "[SU]DOTv8i8")>;
|
||||
def : InstRW<[CortexA55WriteDotVq_4, CortexA55ReadDot], (instregex "[SU]DOTv16i8")>;
|
||||
def : InstRW<[CortexA55WriteDotVd_4], (instregex "[SU]DOTv8i8")>;
|
||||
def : InstRW<[CortexA55WriteDotVq_4], (instregex "[SU]DOTv16i8")>;
|
||||
// ASIMD dot product, by scalar
|
||||
def : InstRW<[CortexA55WriteDotScVq_4, CortexA55ReadDotSc], (instregex "[SU]DOTlanev")>;
|
||||
def : InstRW<[CortexA55WriteDotScVq_4], (instregex "[SU]DOTlanev")>;
|
||||
// ASIMD multiply long
|
||||
def : InstRW<[CortexA55WriteAluVq_4], (instregex "[SU]MULLv", "SQDMULL[iv]")>;
|
||||
// ASIMD polynomial (8x8) multiply long
|
||||
def : InstRW<[CortexA55WriteAluVq_3], (instrs PMULLv8i8, PMULLv16i8)>;
|
||||
// ASIMD pairwise add and accumulate
|
||||
def : InstRW<[CortexA55WriteAluVqL_4], (instregex "[SU]ADALPv")>;
|
||||
def : InstRW<[CortexA55WriteAluVq_4], (instregex "[SU]ADALPv")>;
|
||||
// ASIMD shift accumulate
|
||||
def : InstRW<[CortexA55WriteAluVd_3], (instregex "[SU]SRA(d|v2i32|v4i16|v8i8)")>;
|
||||
def : InstRW<[CortexA55WriteAluVq_3], (instregex "[SU]SRAv(16i8|2i64|4i32|8i16)")>;
|
||||
// ASIMD shift accumulate #2
|
||||
def : InstRW<[CortexA55WriteAluVqL_4], (instregex "[SU]RSRA[vd]")>;
|
||||
def : InstRW<[CortexA55WriteAluVq_4], (instregex "[SU]RSRA[vd]")>;
|
||||
// ASIMD shift by immed
|
||||
def : InstRW<[CortexA55WriteAluVd_2], (instregex "SHLd$", "SHLv",
|
||||
"SLId$", "SRId$", "[SU]SHR[vd]", "SHRNv")>;
|
||||
|
|
|
@ -1457,12 +1457,12 @@ zip2 v0.8h, v0.8h, v0.8h
|
|||
# CHECK-NEXT: 1 3 0.50 pmul v0.8b, v0.8b, v0.8b
|
||||
# CHECK-NEXT: 1 3 1.00 pmull v0.8h, v0.8b, v0.8b
|
||||
# CHECK-NEXT: 1 3 1.00 pmull2 v0.8h, v0.16b, v0.16b
|
||||
# CHECK-NEXT: 1 4 2.00 raddhn v0.2s, v0.2d, v0.2d
|
||||
# CHECK-NEXT: 1 4 2.00 raddhn v0.4h, v0.4s, v0.4s
|
||||
# CHECK-NEXT: 1 4 2.00 raddhn v0.8b, v0.8h, v0.8h
|
||||
# CHECK-NEXT: 1 4 2.00 raddhn2 v0.16b, v0.8h, v0.8h
|
||||
# CHECK-NEXT: 1 4 2.00 raddhn2 v0.4s, v0.2d, v0.2d
|
||||
# CHECK-NEXT: 1 4 2.00 raddhn2 v0.8h, v0.4s, v0.4s
|
||||
# CHECK-NEXT: 1 4 1.00 raddhn v0.2s, v0.2d, v0.2d
|
||||
# CHECK-NEXT: 1 4 1.00 raddhn v0.4h, v0.4s, v0.4s
|
||||
# CHECK-NEXT: 1 4 1.00 raddhn v0.8b, v0.8h, v0.8h
|
||||
# CHECK-NEXT: 1 4 1.00 raddhn2 v0.16b, v0.8h, v0.8h
|
||||
# CHECK-NEXT: 1 4 1.00 raddhn2 v0.4s, v0.2d, v0.2d
|
||||
# CHECK-NEXT: 1 4 1.00 raddhn2 v0.8h, v0.4s, v0.4s
|
||||
# CHECK-NEXT: 1 4 1.00 rbit v0.16b, v0.16b
|
||||
# CHECK-NEXT: 1 4 0.50 rbit v0.8b, v0.8b
|
||||
# CHECK-NEXT: 1 4 0.50 rev16 v21.8b, v1.8b
|
||||
|
@ -1483,19 +1483,19 @@ zip2 v0.8h, v0.8h, v0.8h
|
|||
# CHECK-NEXT: 1 3 1.00 rshrn2 v0.16b, v0.8h, #3
|
||||
# CHECK-NEXT: 1 3 1.00 rshrn2 v0.4s, v0.2d, #3
|
||||
# CHECK-NEXT: 1 3 1.00 rshrn2 v0.8h, v0.4s, #3
|
||||
# CHECK-NEXT: 1 4 2.00 rsubhn v0.2s, v0.2d, v0.2d
|
||||
# CHECK-NEXT: 1 4 2.00 rsubhn v0.4h, v0.4s, v0.4s
|
||||
# CHECK-NEXT: 1 4 2.00 rsubhn v0.8b, v0.8h, v0.8h
|
||||
# CHECK-NEXT: 1 4 2.00 rsubhn2 v0.16b, v0.8h, v0.8h
|
||||
# CHECK-NEXT: 1 4 2.00 rsubhn2 v0.4s, v0.2d, v0.2d
|
||||
# CHECK-NEXT: 1 4 2.00 rsubhn2 v0.8h, v0.4s, v0.4s
|
||||
# CHECK-NEXT: 1 4 2.00 saba v0.16b, v0.16b, v0.16b
|
||||
# CHECK-NEXT: 1 4 2.00 sabal v0.2d, v0.2s, v0.2s
|
||||
# CHECK-NEXT: 1 4 2.00 sabal v0.4s, v0.4h, v0.4h
|
||||
# CHECK-NEXT: 1 4 2.00 sabal v0.8h, v0.8b, v0.8b
|
||||
# CHECK-NEXT: 1 4 2.00 sabal2 v0.2d, v0.4s, v0.4s
|
||||
# CHECK-NEXT: 1 4 2.00 sabal2 v0.4s, v0.8h, v0.8h
|
||||
# CHECK-NEXT: 1 4 2.00 sabal2 v0.8h, v0.16b, v0.16b
|
||||
# CHECK-NEXT: 1 4 1.00 rsubhn v0.2s, v0.2d, v0.2d
|
||||
# CHECK-NEXT: 1 4 1.00 rsubhn v0.4h, v0.4s, v0.4s
|
||||
# CHECK-NEXT: 1 4 1.00 rsubhn v0.8b, v0.8h, v0.8h
|
||||
# CHECK-NEXT: 1 4 1.00 rsubhn2 v0.16b, v0.8h, v0.8h
|
||||
# CHECK-NEXT: 1 4 1.00 rsubhn2 v0.4s, v0.2d, v0.2d
|
||||
# CHECK-NEXT: 1 4 1.00 rsubhn2 v0.8h, v0.4s, v0.4s
|
||||
# CHECK-NEXT: 1 4 1.00 saba v0.16b, v0.16b, v0.16b
|
||||
# CHECK-NEXT: 1 4 1.00 sabal v0.2d, v0.2s, v0.2s
|
||||
# CHECK-NEXT: 1 4 1.00 sabal v0.4s, v0.4h, v0.4h
|
||||
# CHECK-NEXT: 1 4 1.00 sabal v0.8h, v0.8b, v0.8b
|
||||
# CHECK-NEXT: 1 4 1.00 sabal2 v0.2d, v0.4s, v0.4s
|
||||
# CHECK-NEXT: 1 4 1.00 sabal2 v0.4s, v0.8h, v0.8h
|
||||
# CHECK-NEXT: 1 4 1.00 sabal2 v0.8h, v0.16b, v0.16b
|
||||
# CHECK-NEXT: 1 3 0.50 sabd v0.4h, v0.4h, v0.4h
|
||||
# CHECK-NEXT: 1 3 1.00 sabdl v0.2d, v0.2s, v0.2s
|
||||
# CHECK-NEXT: 1 3 1.00 sabdl v0.4s, v0.4h, v0.4h
|
||||
|
@ -1503,12 +1503,12 @@ zip2 v0.8h, v0.8h, v0.8h
|
|||
# CHECK-NEXT: 1 3 1.00 sabdl2 v0.2d, v0.4s, v0.4s
|
||||
# CHECK-NEXT: 1 3 1.00 sabdl2 v0.4s, v0.8h, v0.8h
|
||||
# CHECK-NEXT: 1 3 1.00 sabdl2 v0.8h, v0.16b, v0.16b
|
||||
# CHECK-NEXT: 1 4 2.00 sadalp v0.1d, v0.2s
|
||||
# CHECK-NEXT: 1 4 2.00 sadalp v0.2d, v0.4s
|
||||
# CHECK-NEXT: 1 4 2.00 sadalp v0.2s, v0.4h
|
||||
# CHECK-NEXT: 1 4 2.00 sadalp v0.4h, v0.8b
|
||||
# CHECK-NEXT: 1 4 2.00 sadalp v0.4s, v0.8h
|
||||
# CHECK-NEXT: 1 4 2.00 sadalp v0.8h, v0.16b
|
||||
# CHECK-NEXT: 1 4 1.00 sadalp v0.1d, v0.2s
|
||||
# CHECK-NEXT: 1 4 1.00 sadalp v0.2d, v0.4s
|
||||
# CHECK-NEXT: 1 4 1.00 sadalp v0.2s, v0.4h
|
||||
# CHECK-NEXT: 1 4 1.00 sadalp v0.4h, v0.8b
|
||||
# CHECK-NEXT: 1 4 1.00 sadalp v0.4s, v0.8h
|
||||
# CHECK-NEXT: 1 4 1.00 sadalp v0.8h, v0.16b
|
||||
# CHECK-NEXT: 1 3 1.00 saddl v0.2d, v0.2s, v0.2s
|
||||
# CHECK-NEXT: 1 3 1.00 saddl v0.4s, v0.4h, v0.4h
|
||||
# CHECK-NEXT: 1 3 1.00 saddl v0.8h, v0.8b, v0.8b
|
||||
|
@ -1777,14 +1777,14 @@ zip2 v0.8h, v0.8h, v0.8h
|
|||
# CHECK-NEXT: 1 3 1.00 srshr v0.4s, v0.4s, #3
|
||||
# CHECK-NEXT: 1 3 0.50 srshr v0.8b, v0.8b, #3
|
||||
# CHECK-NEXT: 1 3 1.00 srshr v0.8h, v0.8h, #3
|
||||
# CHECK-NEXT: 1 4 2.00 srsra d15, d11, #19
|
||||
# CHECK-NEXT: 1 4 2.00 srsra v0.16b, v0.16b, #3
|
||||
# CHECK-NEXT: 1 4 2.00 srsra v0.2d, v0.2d, #3
|
||||
# CHECK-NEXT: 1 4 2.00 srsra v0.2s, v0.2s, #3
|
||||
# CHECK-NEXT: 1 4 2.00 srsra v0.4h, v0.4h, #3
|
||||
# CHECK-NEXT: 1 4 2.00 srsra v0.4s, v0.4s, #3
|
||||
# CHECK-NEXT: 1 4 2.00 srsra v0.8b, v0.8b, #3
|
||||
# CHECK-NEXT: 1 4 2.00 srsra v0.8h, v0.8h, #3
|
||||
# CHECK-NEXT: 1 4 1.00 srsra d15, d11, #19
|
||||
# CHECK-NEXT: 1 4 1.00 srsra v0.16b, v0.16b, #3
|
||||
# CHECK-NEXT: 1 4 1.00 srsra v0.2d, v0.2d, #3
|
||||
# CHECK-NEXT: 1 4 1.00 srsra v0.2s, v0.2s, #3
|
||||
# CHECK-NEXT: 1 4 1.00 srsra v0.4h, v0.4h, #3
|
||||
# CHECK-NEXT: 1 4 1.00 srsra v0.4s, v0.4s, #3
|
||||
# CHECK-NEXT: 1 4 1.00 srsra v0.8b, v0.8b, #3
|
||||
# CHECK-NEXT: 1 4 1.00 srsra v0.8h, v0.8h, #3
|
||||
# CHECK-NEXT: 1 2 0.50 sshl d31, d31, d31
|
||||
# CHECK-NEXT: 1 2 1.00 sshl v0.2d, v0.2d, v0.2d
|
||||
# CHECK-NEXT: 1 2 0.50 sshl v0.2s, v0.2s, v0.2s
|
||||
|
@ -1885,13 +1885,13 @@ zip2 v0.8h, v0.8h, v0.8h
|
|||
# CHECK-NEXT: 1 4 1.00 trn2 v0.4s, v0.4s, v0.4s
|
||||
# CHECK-NEXT: 1 4 0.50 trn2 v0.8b, v0.8b, v0.8b
|
||||
# CHECK-NEXT: 1 4 1.00 trn2 v0.8h, v0.8h, v0.8h
|
||||
# CHECK-NEXT: 1 4 2.00 uaba v0.8b, v0.8b, v0.8b
|
||||
# CHECK-NEXT: 1 4 2.00 uabal v0.2d, v0.2s, v0.2s
|
||||
# CHECK-NEXT: 1 4 2.00 uabal v0.4s, v0.4h, v0.4h
|
||||
# CHECK-NEXT: 1 4 2.00 uabal v0.8h, v0.8b, v0.8b
|
||||
# CHECK-NEXT: 1 4 2.00 uabal2 v0.2d, v0.4s, v0.4s
|
||||
# CHECK-NEXT: 1 4 2.00 uabal2 v0.4s, v0.8h, v0.8h
|
||||
# CHECK-NEXT: 1 4 2.00 uabal2 v0.8h, v0.16b, v0.16b
|
||||
# CHECK-NEXT: 1 4 1.00 uaba v0.8b, v0.8b, v0.8b
|
||||
# CHECK-NEXT: 1 4 1.00 uabal v0.2d, v0.2s, v0.2s
|
||||
# CHECK-NEXT: 1 4 1.00 uabal v0.4s, v0.4h, v0.4h
|
||||
# CHECK-NEXT: 1 4 1.00 uabal v0.8h, v0.8b, v0.8b
|
||||
# CHECK-NEXT: 1 4 1.00 uabal2 v0.2d, v0.4s, v0.4s
|
||||
# CHECK-NEXT: 1 4 1.00 uabal2 v0.4s, v0.8h, v0.8h
|
||||
# CHECK-NEXT: 1 4 1.00 uabal2 v0.8h, v0.16b, v0.16b
|
||||
# CHECK-NEXT: 1 3 0.50 uabd v0.4h, v0.4h, v0.4h
|
||||
# CHECK-NEXT: 1 3 1.00 uabdl v0.2d, v0.2s, v0.2s
|
||||
# CHECK-NEXT: 1 3 1.00 uabdl v0.4s, v0.4h, v0.4h
|
||||
|
@ -1899,12 +1899,12 @@ zip2 v0.8h, v0.8h, v0.8h
|
|||
# CHECK-NEXT: 1 3 1.00 uabdl2 v0.2d, v0.4s, v0.4s
|
||||
# CHECK-NEXT: 1 3 1.00 uabdl2 v0.4s, v0.8h, v0.8h
|
||||
# CHECK-NEXT: 1 3 1.00 uabdl2 v0.8h, v0.16b, v0.16b
|
||||
# CHECK-NEXT: 1 4 2.00 uadalp v0.1d, v0.2s
|
||||
# CHECK-NEXT: 1 4 2.00 uadalp v0.2d, v0.4s
|
||||
# CHECK-NEXT: 1 4 2.00 uadalp v0.2s, v0.4h
|
||||
# CHECK-NEXT: 1 4 2.00 uadalp v0.4h, v0.8b
|
||||
# CHECK-NEXT: 1 4 2.00 uadalp v0.4s, v0.8h
|
||||
# CHECK-NEXT: 1 4 2.00 uadalp v0.8h, v0.16b
|
||||
# CHECK-NEXT: 1 4 1.00 uadalp v0.1d, v0.2s
|
||||
# CHECK-NEXT: 1 4 1.00 uadalp v0.2d, v0.4s
|
||||
# CHECK-NEXT: 1 4 1.00 uadalp v0.2s, v0.4h
|
||||
# CHECK-NEXT: 1 4 1.00 uadalp v0.4h, v0.8b
|
||||
# CHECK-NEXT: 1 4 1.00 uadalp v0.4s, v0.8h
|
||||
# CHECK-NEXT: 1 4 1.00 uadalp v0.8h, v0.16b
|
||||
# CHECK-NEXT: 1 3 1.00 uaddl v0.2d, v0.2s, v0.2s
|
||||
# CHECK-NEXT: 1 3 1.00 uaddl v0.4s, v0.4h, v0.4h
|
||||
# CHECK-NEXT: 1 3 1.00 uaddl v0.8h, v0.8b, v0.8b
|
||||
|
@ -2042,14 +2042,14 @@ zip2 v0.8h, v0.8h, v0.8h
|
|||
# CHECK-NEXT: 1 3 1.00 urshr v0.8h, v0.8h, #3
|
||||
# CHECK-NEXT: 1 12 9.00 ursqrte v0.2s, v0.2s
|
||||
# CHECK-NEXT: 1 12 9.00 ursqrte v0.4s, v0.4s
|
||||
# CHECK-NEXT: 1 4 2.00 ursra d18, d10, #13
|
||||
# CHECK-NEXT: 1 4 2.00 ursra v0.16b, v0.16b, #3
|
||||
# CHECK-NEXT: 1 4 2.00 ursra v0.2d, v0.2d, #3
|
||||
# CHECK-NEXT: 1 4 2.00 ursra v0.2s, v0.2s, #3
|
||||
# CHECK-NEXT: 1 4 2.00 ursra v0.4h, v0.4h, #3
|
||||
# CHECK-NEXT: 1 4 2.00 ursra v0.4s, v0.4s, #3
|
||||
# CHECK-NEXT: 1 4 2.00 ursra v0.8b, v0.8b, #3
|
||||
# CHECK-NEXT: 1 4 2.00 ursra v0.8h, v0.8h, #3
|
||||
# CHECK-NEXT: 1 4 1.00 ursra d18, d10, #13
|
||||
# CHECK-NEXT: 1 4 1.00 ursra v0.16b, v0.16b, #3
|
||||
# CHECK-NEXT: 1 4 1.00 ursra v0.2d, v0.2d, #3
|
||||
# CHECK-NEXT: 1 4 1.00 ursra v0.2s, v0.2s, #3
|
||||
# CHECK-NEXT: 1 4 1.00 ursra v0.4h, v0.4h, #3
|
||||
# CHECK-NEXT: 1 4 1.00 ursra v0.4s, v0.4s, #3
|
||||
# CHECK-NEXT: 1 4 1.00 ursra v0.8b, v0.8b, #3
|
||||
# CHECK-NEXT: 1 4 1.00 ursra v0.8h, v0.8h, #3
|
||||
# CHECK-NEXT: 1 2 0.50 ushl d0, d0, d0
|
||||
# CHECK-NEXT: 1 2 1.00 ushl v0.16b, v0.16b, v0.16b
|
||||
# CHECK-NEXT: 1 2 1.00 ushl v0.4s, v0.4s, v0.4s
|
||||
|
@ -2146,7 +2146,7 @@ zip2 v0.8h, v0.8h, v0.8h
|
|||
|
||||
# CHECK: Resource pressure per iteration:
|
||||
# CHECK-NEXT: [0.0] [0.1] [1] [2] [3.0] [3.1] [4] [5.0] [5.1] [6] [7] [8]
|
||||
# CHECK-NEXT: - - - - 780.00 780.00 197.00 3.00 3.00 107.00 - 52.00
|
||||
# CHECK-NEXT: - - - - 726.00 726.00 197.00 3.00 3.00 107.00 - 52.00
|
||||
|
||||
# CHECK: Resource pressure by instruction:
|
||||
# CHECK-NEXT: [0.0] [0.1] [1] [2] [3.0] [3.1] [4] [5.0] [5.1] [6] [7] [8] Instructions:
|
||||
|
@ -2537,12 +2537,12 @@ zip2 v0.8h, v0.8h, v0.8h
|
|||
# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - pmul v0.8b, v0.8b, v0.8b
|
||||
# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - pmull v0.8h, v0.8b, v0.8b
|
||||
# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - pmull2 v0.8h, v0.16b, v0.16b
|
||||
# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - raddhn v0.2s, v0.2d, v0.2d
|
||||
# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - raddhn v0.4h, v0.4s, v0.4s
|
||||
# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - raddhn v0.8b, v0.8h, v0.8h
|
||||
# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - raddhn2 v0.16b, v0.8h, v0.8h
|
||||
# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - raddhn2 v0.4s, v0.2d, v0.2d
|
||||
# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - raddhn2 v0.8h, v0.4s, v0.4s
|
||||
# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - raddhn v0.2s, v0.2d, v0.2d
|
||||
# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - raddhn v0.4h, v0.4s, v0.4s
|
||||
# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - raddhn v0.8b, v0.8h, v0.8h
|
||||
# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - raddhn2 v0.16b, v0.8h, v0.8h
|
||||
# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - raddhn2 v0.4s, v0.2d, v0.2d
|
||||
# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - raddhn2 v0.8h, v0.4s, v0.4s
|
||||
# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - rbit v0.16b, v0.16b
|
||||
# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - rbit v0.8b, v0.8b
|
||||
# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - rev16 v21.8b, v1.8b
|
||||
|
@ -2563,19 +2563,19 @@ zip2 v0.8h, v0.8h, v0.8h
|
|||
# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - rshrn2 v0.16b, v0.8h, #3
|
||||
# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - rshrn2 v0.4s, v0.2d, #3
|
||||
# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - rshrn2 v0.8h, v0.4s, #3
|
||||
# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - rsubhn v0.2s, v0.2d, v0.2d
|
||||
# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - rsubhn v0.4h, v0.4s, v0.4s
|
||||
# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - rsubhn v0.8b, v0.8h, v0.8h
|
||||
# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - rsubhn2 v0.16b, v0.8h, v0.8h
|
||||
# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - rsubhn2 v0.4s, v0.2d, v0.2d
|
||||
# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - rsubhn2 v0.8h, v0.4s, v0.4s
|
||||
# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - saba v0.16b, v0.16b, v0.16b
|
||||
# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - sabal v0.2d, v0.2s, v0.2s
|
||||
# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - sabal v0.4s, v0.4h, v0.4h
|
||||
# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - sabal v0.8h, v0.8b, v0.8b
|
||||
# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - sabal2 v0.2d, v0.4s, v0.4s
|
||||
# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - sabal2 v0.4s, v0.8h, v0.8h
|
||||
# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - sabal2 v0.8h, v0.16b, v0.16b
|
||||
# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - rsubhn v0.2s, v0.2d, v0.2d
|
||||
# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - rsubhn v0.4h, v0.4s, v0.4s
|
||||
# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - rsubhn v0.8b, v0.8h, v0.8h
|
||||
# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - rsubhn2 v0.16b, v0.8h, v0.8h
|
||||
# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - rsubhn2 v0.4s, v0.2d, v0.2d
|
||||
# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - rsubhn2 v0.8h, v0.4s, v0.4s
|
||||
# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - saba v0.16b, v0.16b, v0.16b
|
||||
# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sabal v0.2d, v0.2s, v0.2s
|
||||
# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sabal v0.4s, v0.4h, v0.4h
|
||||
# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sabal v0.8h, v0.8b, v0.8b
|
||||
# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sabal2 v0.2d, v0.4s, v0.4s
|
||||
# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sabal2 v0.4s, v0.8h, v0.8h
|
||||
# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sabal2 v0.8h, v0.16b, v0.16b
|
||||
# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sabd v0.4h, v0.4h, v0.4h
|
||||
# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sabdl v0.2d, v0.2s, v0.2s
|
||||
# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sabdl v0.4s, v0.4h, v0.4h
|
||||
|
@ -2583,12 +2583,12 @@ zip2 v0.8h, v0.8h, v0.8h
|
|||
# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sabdl2 v0.2d, v0.4s, v0.4s
|
||||
# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sabdl2 v0.4s, v0.8h, v0.8h
|
||||
# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sabdl2 v0.8h, v0.16b, v0.16b
|
||||
# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - sadalp v0.1d, v0.2s
|
||||
# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - sadalp v0.2d, v0.4s
|
||||
# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - sadalp v0.2s, v0.4h
|
||||
# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - sadalp v0.4h, v0.8b
|
||||
# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - sadalp v0.4s, v0.8h
|
||||
# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - sadalp v0.8h, v0.16b
|
||||
# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sadalp v0.1d, v0.2s
|
||||
# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sadalp v0.2d, v0.4s
|
||||
# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sadalp v0.2s, v0.4h
|
||||
# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sadalp v0.4h, v0.8b
|
||||
# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sadalp v0.4s, v0.8h
|
||||
# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sadalp v0.8h, v0.16b
|
||||
# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - saddl v0.2d, v0.2s, v0.2s
|
||||
# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - saddl v0.4s, v0.4h, v0.4h
|
||||
# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - saddl v0.8h, v0.8b, v0.8b
|
||||
|
@ -2857,14 +2857,14 @@ zip2 v0.8h, v0.8h, v0.8h
|
|||
# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - srshr v0.4s, v0.4s, #3
|
||||
# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - srshr v0.8b, v0.8b, #3
|
||||
# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - srshr v0.8h, v0.8h, #3
|
||||
# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - srsra d15, d11, #19
|
||||
# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - srsra v0.16b, v0.16b, #3
|
||||
# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - srsra v0.2d, v0.2d, #3
|
||||
# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - srsra v0.2s, v0.2s, #3
|
||||
# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - srsra v0.4h, v0.4h, #3
|
||||
# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - srsra v0.4s, v0.4s, #3
|
||||
# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - srsra v0.8b, v0.8b, #3
|
||||
# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - srsra v0.8h, v0.8h, #3
|
||||
# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - srsra d15, d11, #19
|
||||
# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - srsra v0.16b, v0.16b, #3
|
||||
# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - srsra v0.2d, v0.2d, #3
|
||||
# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - srsra v0.2s, v0.2s, #3
|
||||
# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - srsra v0.4h, v0.4h, #3
|
||||
# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - srsra v0.4s, v0.4s, #3
|
||||
# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - srsra v0.8b, v0.8b, #3
|
||||
# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - srsra v0.8h, v0.8h, #3
|
||||
# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sshl d31, d31, d31
|
||||
# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sshl v0.2d, v0.2d, v0.2d
|
||||
# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sshl v0.2s, v0.2s, v0.2s
|
||||
|
@ -2965,13 +2965,13 @@ zip2 v0.8h, v0.8h, v0.8h
|
|||
# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - trn2 v0.4s, v0.4s, v0.4s
|
||||
# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - trn2 v0.8b, v0.8b, v0.8b
|
||||
# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - trn2 v0.8h, v0.8h, v0.8h
|
||||
# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - uaba v0.8b, v0.8b, v0.8b
|
||||
# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - uabal v0.2d, v0.2s, v0.2s
|
||||
# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - uabal v0.4s, v0.4h, v0.4h
|
||||
# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - uabal v0.8h, v0.8b, v0.8b
|
||||
# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - uabal2 v0.2d, v0.4s, v0.4s
|
||||
# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - uabal2 v0.4s, v0.8h, v0.8h
|
||||
# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - uabal2 v0.8h, v0.16b, v0.16b
|
||||
# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uaba v0.8b, v0.8b, v0.8b
|
||||
# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uabal v0.2d, v0.2s, v0.2s
|
||||
# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uabal v0.4s, v0.4h, v0.4h
|
||||
# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uabal v0.8h, v0.8b, v0.8b
|
||||
# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uabal2 v0.2d, v0.4s, v0.4s
|
||||
# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uabal2 v0.4s, v0.8h, v0.8h
|
||||
# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uabal2 v0.8h, v0.16b, v0.16b
|
||||
# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uabd v0.4h, v0.4h, v0.4h
|
||||
# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uabdl v0.2d, v0.2s, v0.2s
|
||||
# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uabdl v0.4s, v0.4h, v0.4h
|
||||
|
@ -2979,12 +2979,12 @@ zip2 v0.8h, v0.8h, v0.8h
|
|||
# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uabdl2 v0.2d, v0.4s, v0.4s
|
||||
# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uabdl2 v0.4s, v0.8h, v0.8h
|
||||
# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uabdl2 v0.8h, v0.16b, v0.16b
|
||||
# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - uadalp v0.1d, v0.2s
|
||||
# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - uadalp v0.2d, v0.4s
|
||||
# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - uadalp v0.2s, v0.4h
|
||||
# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - uadalp v0.4h, v0.8b
|
||||
# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - uadalp v0.4s, v0.8h
|
||||
# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - uadalp v0.8h, v0.16b
|
||||
# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uadalp v0.1d, v0.2s
|
||||
# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uadalp v0.2d, v0.4s
|
||||
# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uadalp v0.2s, v0.4h
|
||||
# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uadalp v0.4h, v0.8b
|
||||
# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uadalp v0.4s, v0.8h
|
||||
# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uadalp v0.8h, v0.16b
|
||||
# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uaddl v0.2d, v0.2s, v0.2s
|
||||
# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uaddl v0.4s, v0.4h, v0.4h
|
||||
# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uaddl v0.8h, v0.8b, v0.8b
|
||||
|
@ -3122,14 +3122,14 @@ zip2 v0.8h, v0.8h, v0.8h
|
|||
# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - urshr v0.8h, v0.8h, #3
|
||||
# CHECK-NEXT: - - - - - - 9.00 - - - - - ursqrte v0.2s, v0.2s
|
||||
# CHECK-NEXT: - - - - - - 9.00 - - - - - ursqrte v0.4s, v0.4s
|
||||
# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - ursra d18, d10, #13
|
||||
# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - ursra v0.16b, v0.16b, #3
|
||||
# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - ursra v0.2d, v0.2d, #3
|
||||
# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - ursra v0.2s, v0.2s, #3
|
||||
# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - ursra v0.4h, v0.4h, #3
|
||||
# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - ursra v0.4s, v0.4s, #3
|
||||
# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - ursra v0.8b, v0.8b, #3
|
||||
# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - ursra v0.8h, v0.8h, #3
|
||||
# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ursra d18, d10, #13
|
||||
# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ursra v0.16b, v0.16b, #3
|
||||
# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ursra v0.2d, v0.2d, #3
|
||||
# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ursra v0.2s, v0.2s, #3
|
||||
# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ursra v0.4h, v0.4h, #3
|
||||
# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ursra v0.4s, v0.4s, #3
|
||||
# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ursra v0.8b, v0.8b, #3
|
||||
# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ursra v0.8h, v0.8h, #3
|
||||
# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ushl d0, d0, d0
|
||||
# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ushl v0.16b, v0.16b, v0.16b
|
||||
# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ushl v0.4s, v0.4s, v0.4s
|
||||
|
|
Loading…
Reference in New Issue