forked from OSchip/llvm-project
[AArch64] Add the pipeline model for Exynos M5
Add the scheduling and cost models for Exynos M5.
This commit is contained in:
parent
25f33d8318
commit
9bdfee2a3b
|
@ -450,6 +450,7 @@ include "AArch64SchedFalkor.td"
|
|||
include "AArch64SchedKryo.td"
|
||||
include "AArch64SchedExynosM3.td"
|
||||
include "AArch64SchedExynosM4.td"
|
||||
include "AArch64SchedExynosM5.td"
|
||||
include "AArch64SchedThunderX.td"
|
||||
include "AArch64SchedThunderX2T99.td"
|
||||
|
||||
|
@ -790,7 +791,7 @@ def : ProcessorModel<"neoverse-n1", CortexA57Model, [ProcNeoverseN1]>;
|
|||
def : ProcessorModel<"cyclone", CycloneModel, [ProcCyclone]>;
|
||||
def : ProcessorModel<"exynos-m3", ExynosM3Model, [ProcExynosM3]>;
|
||||
def : ProcessorModel<"exynos-m4", ExynosM4Model, [ProcExynosM4]>;
|
||||
def : ProcessorModel<"exynos-m5", ExynosM4Model, [ProcExynosM4]>;
|
||||
def : ProcessorModel<"exynos-m5", ExynosM5Model, [ProcExynosM4]>;
|
||||
def : ProcessorModel<"falkor", FalkorModel, [ProcFalkor]>;
|
||||
def : ProcessorModel<"saphira", FalkorModel, [ProcSaphira]>;
|
||||
def : ProcessorModel<"kryo", KryoModel, [ProcKryo]>;
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,57 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
|
||||
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3
|
||||
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4
|
||||
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5
|
||||
|
||||
aese v0.16b, v1.16b
|
||||
aesmc v0.16b, v0.16b
|
||||
|
||||
aesd v0.16b, v1.16b
|
||||
aesimc v0.16b, v0.16b
|
||||
|
||||
# ALL: Iterations: 100
|
||||
# ALL-NEXT: Instructions: 400
|
||||
|
||||
# M3-NEXT: Total Cycles: 203
|
||||
# M4-NEXT: Total Cycles: 203
|
||||
# M5-NEXT: Total Cycles: 403
|
||||
|
||||
# ALL-NEXT: Total uOps: 400
|
||||
|
||||
# ALL: Dispatch Width: 6
|
||||
|
||||
# M3-NEXT: uOps Per Cycle: 1.97
|
||||
# M3-NEXT: IPC: 1.97
|
||||
|
||||
# M4-NEXT: uOps Per Cycle: 1.97
|
||||
# M4-NEXT: IPC: 1.97
|
||||
|
||||
# M5-NEXT: uOps Per Cycle: 0.99
|
||||
# M5-NEXT: IPC: 0.99
|
||||
|
||||
# ALL-NEXT: Block RThroughput: 2.0
|
||||
|
||||
# ALL: Instruction Info:
|
||||
# ALL-NEXT: [1]: #uOps
|
||||
# ALL-NEXT: [2]: Latency
|
||||
# ALL-NEXT: [3]: RThroughput
|
||||
# ALL-NEXT: [4]: MayLoad
|
||||
# ALL-NEXT: [5]: MayStore
|
||||
# ALL-NEXT: [6]: HasSideEffects (U)
|
||||
|
||||
# ALL: [1] [2] [3] [4] [5] [6] Instructions:
|
||||
|
||||
# M3-NEXT: 1 1 0.50 aese v0.16b, v1.16b
|
||||
# M3-NEXT: 1 1 0.50 aesmc v0.16b, v0.16b
|
||||
# M3-NEXT: 1 1 0.50 aesd v0.16b, v1.16b
|
||||
# M3-NEXT: 1 1 0.50 aesimc v0.16b, v0.16b
|
||||
|
||||
# M4-NEXT: 1 1 0.50 aese v0.16b, v1.16b
|
||||
# M4-NEXT: 1 1 0.50 aesmc v0.16b, v0.16b
|
||||
# M4-NEXT: 1 1 0.50 aesd v0.16b, v1.16b
|
||||
# M4-NEXT: 1 1 0.50 aesimc v0.16b, v0.16b
|
||||
|
||||
# M5-NEXT: 1 2 0.50 aese v0.16b, v1.16b
|
||||
# M5-NEXT: 1 2 0.50 aesmc v0.16b, v0.16b
|
||||
# M5-NEXT: 1 2 0.50 aesd v0.16b, v1.16b
|
||||
# M5-NEXT: 1 2 0.50 aesimc v0.16b, v0.16b
|
|
@ -0,0 +1,189 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
|
||||
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3
|
||||
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4
|
||||
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5
|
||||
|
||||
ld1 {v0.s}[0], [sp]
|
||||
ld1r {v0.2s}, [sp]
|
||||
ld1 {v0.2s}, [sp]
|
||||
ld1 {v0.2s, v1.2s}, [sp]
|
||||
ld1 {v0.2s, v1.2s, v2.2s}, [sp]
|
||||
ld1 {v0.2s, v1.2s, v2.2s, v3.2s}, [sp]
|
||||
|
||||
ld1 {v0.d}[0], [sp]
|
||||
ld1r {v0.2d}, [sp]
|
||||
ld1 {v0.2d}, [sp]
|
||||
ld1 {v0.2d, v1.2d}, [sp]
|
||||
ld1 {v0.2d, v1.2d, v2.2d}, [sp]
|
||||
ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [sp]
|
||||
|
||||
ld1 {v0.s}[0], [sp], #4
|
||||
ld1r {v0.2s}, [sp], #4
|
||||
ld1 {v0.2s}, [sp], #8
|
||||
ld1 {v0.2s, v1.2s}, [sp], #16
|
||||
ld1 {v0.2s, v1.2s, v2.2s}, [sp], #24
|
||||
ld1 {v0.2s, v1.2s, v2.2s, v3.2s}, [sp], #32
|
||||
|
||||
ld1 {v0.d}[0], [sp], #8
|
||||
ld1r {v0.2d}, [sp], #8
|
||||
ld1 {v0.2d}, [sp], #16
|
||||
ld1 {v0.2d, v1.2d}, [sp], #32
|
||||
ld1 {v0.2d, v1.2d, v2.2d}, [sp], #48
|
||||
ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [sp], #64
|
||||
|
||||
ld1 {v0.s}[0], [sp], x0
|
||||
ld1r {v0.2s}, [sp], x0
|
||||
ld1 {v0.2s}, [sp], x0
|
||||
ld1 {v0.2s, v1.2s}, [sp], x0
|
||||
ld1 {v0.2s, v1.2s, v2.2s}, [sp], x0
|
||||
ld1 {v0.2s, v1.2s, v2.2s, v3.2s}, [sp], x0
|
||||
|
||||
ld1 {v0.d}[0], [sp], x0
|
||||
ld1r {v0.2d}, [sp], x0
|
||||
ld1 {v0.2d}, [sp], x0
|
||||
ld1 {v0.2d, v1.2d}, [sp], x0
|
||||
ld1 {v0.2d, v1.2d, v2.2d}, [sp], x0
|
||||
ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [sp], x0
|
||||
|
||||
# ALL: Iterations: 100
|
||||
# ALL-NEXT: Instructions: 3600
|
||||
|
||||
# M3-NEXT: Total Cycles: 14903
|
||||
# M4-NEXT: Total Cycles: 14703
|
||||
# M5-NEXT: Total Cycles: 17203
|
||||
|
||||
# ALL-NEXT: Total uOps: 10200
|
||||
|
||||
# ALL: Dispatch Width: 6
|
||||
|
||||
# M3-NEXT: uOps Per Cycle: 0.68
|
||||
# M3-NEXT: IPC: 0.24
|
||||
|
||||
# M4-NEXT: uOps Per Cycle: 0.69
|
||||
# M4-NEXT: IPC: 0.24
|
||||
|
||||
# M5-NEXT: uOps Per Cycle: 0.59
|
||||
# M5-NEXT: IPC: 0.21
|
||||
|
||||
# ALL-NEXT: Block RThroughput: 39.0
|
||||
|
||||
# ALL: Instruction Info:
|
||||
# ALL-NEXT: [1]: #uOps
|
||||
# ALL-NEXT: [2]: Latency
|
||||
# ALL-NEXT: [3]: RThroughput
|
||||
# ALL-NEXT: [4]: MayLoad
|
||||
# ALL-NEXT: [5]: MayStore
|
||||
# ALL-NEXT: [6]: HasSideEffects (U)
|
||||
|
||||
# ALL: [1] [2] [3] [4] [5] [6] Instructions:
|
||||
|
||||
# M3-NEXT: 2 7 1.00 * ld1 { v0.s }[0], [sp]
|
||||
# M3-NEXT: 1 5 0.50 * ld1r { v0.2s }, [sp]
|
||||
# M3-NEXT: 1 5 0.50 * ld1 { v0.2s }, [sp]
|
||||
# M3-NEXT: 2 5 1.00 * ld1 { v0.2s, v1.2s }, [sp]
|
||||
# M3-NEXT: 3 6 1.50 * ld1 { v0.2s, v1.2s, v2.2s }, [sp]
|
||||
# M3-NEXT: 4 6 2.00 * ld1 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp]
|
||||
# M3-NEXT: 2 6 1.00 * ld1 { v0.d }[0], [sp]
|
||||
# M3-NEXT: 1 5 0.50 * ld1r { v0.2d }, [sp]
|
||||
# M3-NEXT: 1 5 0.50 * ld1 { v0.2d }, [sp]
|
||||
# M3-NEXT: 2 5 1.00 * ld1 { v0.2d, v1.2d }, [sp]
|
||||
# M3-NEXT: 3 6 1.50 * ld1 { v0.2d, v1.2d, v2.2d }, [sp]
|
||||
# M3-NEXT: 4 6 2.00 * ld1 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp]
|
||||
# M3-NEXT: 3 7 1.00 * ld1 { v0.s }[0], [sp], #4
|
||||
# M3-NEXT: 2 5 0.50 * ld1r { v0.2s }, [sp], #4
|
||||
# M3-NEXT: 2 5 0.50 * ld1 { v0.2s }, [sp], #8
|
||||
# M3-NEXT: 3 5 1.00 * ld1 { v0.2s, v1.2s }, [sp], #16
|
||||
# M3-NEXT: 4 6 1.50 * ld1 { v0.2s, v1.2s, v2.2s }, [sp], #24
|
||||
# M3-NEXT: 5 6 2.00 * ld1 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], #32
|
||||
# M3-NEXT: 3 6 1.00 * ld1 { v0.d }[0], [sp], #8
|
||||
# M3-NEXT: 2 5 0.50 * ld1r { v0.2d }, [sp], #8
|
||||
# M3-NEXT: 2 5 0.50 * ld1 { v0.2d }, [sp], #16
|
||||
# M3-NEXT: 3 5 1.00 * ld1 { v0.2d, v1.2d }, [sp], #32
|
||||
# M3-NEXT: 4 6 1.50 * ld1 { v0.2d, v1.2d, v2.2d }, [sp], #48
|
||||
# M3-NEXT: 5 6 2.00 * ld1 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], #64
|
||||
# M3-NEXT: 3 7 1.00 * ld1 { v0.s }[0], [sp], x0
|
||||
# M3-NEXT: 2 5 0.50 * ld1r { v0.2s }, [sp], x0
|
||||
# M3-NEXT: 2 5 0.50 * ld1 { v0.2s }, [sp], x0
|
||||
# M3-NEXT: 3 5 1.00 * ld1 { v0.2s, v1.2s }, [sp], x0
|
||||
# M3-NEXT: 4 6 1.50 * ld1 { v0.2s, v1.2s, v2.2s }, [sp], x0
|
||||
# M3-NEXT: 5 6 2.00 * ld1 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], x0
|
||||
# M3-NEXT: 3 6 1.00 * ld1 { v0.d }[0], [sp], x0
|
||||
# M3-NEXT: 2 5 0.50 * ld1r { v0.2d }, [sp], x0
|
||||
# M3-NEXT: 2 5 0.50 * ld1 { v0.2d }, [sp], x0
|
||||
# M3-NEXT: 3 5 1.00 * ld1 { v0.2d, v1.2d }, [sp], x0
|
||||
# M3-NEXT: 4 6 1.50 * ld1 { v0.2d, v1.2d, v2.2d }, [sp], x0
|
||||
# M3-NEXT: 5 6 2.00 * ld1 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], x0
|
||||
|
||||
# M4-NEXT: 2 6 1.00 * ld1 { v0.s }[0], [sp]
|
||||
# M4-NEXT: 1 5 0.50 * ld1r { v0.2s }, [sp]
|
||||
# M4-NEXT: 1 5 0.50 * ld1 { v0.2s }, [sp]
|
||||
# M4-NEXT: 2 5 1.00 * ld1 { v0.2s, v1.2s }, [sp]
|
||||
# M4-NEXT: 3 6 1.50 * ld1 { v0.2s, v1.2s, v2.2s }, [sp]
|
||||
# M4-NEXT: 4 6 2.00 * ld1 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp]
|
||||
# M4-NEXT: 2 6 1.00 * ld1 { v0.d }[0], [sp]
|
||||
# M4-NEXT: 1 5 0.50 * ld1r { v0.2d }, [sp]
|
||||
# M4-NEXT: 1 5 0.50 * ld1 { v0.2d }, [sp]
|
||||
# M4-NEXT: 2 5 1.00 * ld1 { v0.2d, v1.2d }, [sp]
|
||||
# M4-NEXT: 3 6 1.50 * ld1 { v0.2d, v1.2d, v2.2d }, [sp]
|
||||
# M4-NEXT: 4 6 2.00 * ld1 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp]
|
||||
# M4-NEXT: 3 6 1.00 * ld1 { v0.s }[0], [sp], #4
|
||||
# M4-NEXT: 2 5 0.50 * ld1r { v0.2s }, [sp], #4
|
||||
# M4-NEXT: 2 5 0.50 * ld1 { v0.2s }, [sp], #8
|
||||
# M4-NEXT: 3 5 1.00 * ld1 { v0.2s, v1.2s }, [sp], #16
|
||||
# M4-NEXT: 4 6 1.50 * ld1 { v0.2s, v1.2s, v2.2s }, [sp], #24
|
||||
# M4-NEXT: 5 6 2.00 * ld1 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], #32
|
||||
# M4-NEXT: 3 6 1.00 * ld1 { v0.d }[0], [sp], #8
|
||||
# M4-NEXT: 2 5 0.50 * ld1r { v0.2d }, [sp], #8
|
||||
# M4-NEXT: 2 5 0.50 * ld1 { v0.2d }, [sp], #16
|
||||
# M4-NEXT: 3 5 1.00 * ld1 { v0.2d, v1.2d }, [sp], #32
|
||||
# M4-NEXT: 4 6 1.50 * ld1 { v0.2d, v1.2d, v2.2d }, [sp], #48
|
||||
# M4-NEXT: 5 6 2.00 * ld1 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], #64
|
||||
# M4-NEXT: 3 6 1.00 * ld1 { v0.s }[0], [sp], x0
|
||||
# M4-NEXT: 2 5 0.50 * ld1r { v0.2s }, [sp], x0
|
||||
# M4-NEXT: 2 5 0.50 * ld1 { v0.2s }, [sp], x0
|
||||
# M4-NEXT: 3 5 1.00 * ld1 { v0.2s, v1.2s }, [sp], x0
|
||||
# M4-NEXT: 4 6 1.50 * ld1 { v0.2s, v1.2s, v2.2s }, [sp], x0
|
||||
# M4-NEXT: 5 6 2.00 * ld1 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], x0
|
||||
# M4-NEXT: 3 6 1.00 * ld1 { v0.d }[0], [sp], x0
|
||||
# M4-NEXT: 2 5 0.50 * ld1r { v0.2d }, [sp], x0
|
||||
# M4-NEXT: 2 5 0.50 * ld1 { v0.2d }, [sp], x0
|
||||
# M4-NEXT: 3 5 1.00 * ld1 { v0.2d, v1.2d }, [sp], x0
|
||||
# M4-NEXT: 4 6 1.50 * ld1 { v0.2d, v1.2d, v2.2d }, [sp], x0
|
||||
# M4-NEXT: 5 6 2.00 * ld1 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], x0
|
||||
|
||||
# M5-NEXT: 2 7 1.00 * ld1 { v0.s }[0], [sp]
|
||||
# M5-NEXT: 1 6 0.50 * ld1r { v0.2s }, [sp]
|
||||
# M5-NEXT: 1 6 0.50 * ld1 { v0.2s }, [sp]
|
||||
# M5-NEXT: 2 6 1.00 * ld1 { v0.2s, v1.2s }, [sp]
|
||||
# M5-NEXT: 3 7 1.50 * ld1 { v0.2s, v1.2s, v2.2s }, [sp]
|
||||
# M5-NEXT: 4 7 2.00 * ld1 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp]
|
||||
# M5-NEXT: 2 7 1.00 * ld1 { v0.d }[0], [sp]
|
||||
# M5-NEXT: 1 6 0.50 * ld1r { v0.2d }, [sp]
|
||||
# M5-NEXT: 1 6 0.50 * ld1 { v0.2d }, [sp]
|
||||
# M5-NEXT: 2 6 1.00 * ld1 { v0.2d, v1.2d }, [sp]
|
||||
# M5-NEXT: 3 7 1.50 * ld1 { v0.2d, v1.2d, v2.2d }, [sp]
|
||||
# M5-NEXT: 4 7 2.00 * ld1 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp]
|
||||
# M5-NEXT: 3 7 1.00 * ld1 { v0.s }[0], [sp], #4
|
||||
# M5-NEXT: 2 6 0.50 * ld1r { v0.2s }, [sp], #4
|
||||
# M5-NEXT: 2 6 0.50 * ld1 { v0.2s }, [sp], #8
|
||||
# M5-NEXT: 3 6 1.00 * ld1 { v0.2s, v1.2s }, [sp], #16
|
||||
# M5-NEXT: 4 7 1.50 * ld1 { v0.2s, v1.2s, v2.2s }, [sp], #24
|
||||
# M5-NEXT: 5 7 2.00 * ld1 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], #32
|
||||
# M5-NEXT: 3 7 1.00 * ld1 { v0.d }[0], [sp], #8
|
||||
# M5-NEXT: 2 6 0.50 * ld1r { v0.2d }, [sp], #8
|
||||
# M5-NEXT: 2 6 0.50 * ld1 { v0.2d }, [sp], #16
|
||||
# M5-NEXT: 3 6 1.00 * ld1 { v0.2d, v1.2d }, [sp], #32
|
||||
# M5-NEXT: 4 7 1.50 * ld1 { v0.2d, v1.2d, v2.2d }, [sp], #48
|
||||
# M5-NEXT: 5 7 2.00 * ld1 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], #64
|
||||
# M5-NEXT: 3 7 1.00 * ld1 { v0.s }[0], [sp], x0
|
||||
# M5-NEXT: 2 6 0.50 * ld1r { v0.2s }, [sp], x0
|
||||
# M5-NEXT: 2 6 0.50 * ld1 { v0.2s }, [sp], x0
|
||||
# M5-NEXT: 3 6 1.00 * ld1 { v0.2s, v1.2s }, [sp], x0
|
||||
# M5-NEXT: 4 7 1.50 * ld1 { v0.2s, v1.2s, v2.2s }, [sp], x0
|
||||
# M5-NEXT: 5 7 2.00 * ld1 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], x0
|
||||
# M5-NEXT: 3 7 1.00 * ld1 { v0.d }[0], [sp], x0
|
||||
# M5-NEXT: 2 6 0.50 * ld1r { v0.2d }, [sp], x0
|
||||
# M5-NEXT: 2 6 0.50 * ld1 { v0.2d }, [sp], x0
|
||||
# M5-NEXT: 3 6 1.00 * ld1 { v0.2d, v1.2d }, [sp], x0
|
||||
# M5-NEXT: 4 7 1.50 * ld1 { v0.2d, v1.2d, v2.2d }, [sp], x0
|
||||
# M5-NEXT: 5 7 2.00 * ld1 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], x0
|
|
@ -0,0 +1,118 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
|
||||
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3
|
||||
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4
|
||||
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5
|
||||
|
||||
ld2 {v0.s, v1.s}[0], [sp]
|
||||
ld2r {v0.2s, v1.2s}, [sp]
|
||||
ld2 {v0.2s, v1.2s}, [sp]
|
||||
|
||||
ld2 {v0.d, v1.d}[0], [sp]
|
||||
ld2r {v0.2d, v1.2d}, [sp]
|
||||
ld2 {v0.2d, v1.2d}, [sp]
|
||||
|
||||
ld2 {v0.s, v1.s}[0], [sp], #8
|
||||
ld2r {v0.2s, v1.2s}, [sp], #8
|
||||
ld2 {v0.2s, v1.2s}, [sp], #16
|
||||
|
||||
ld2 {v0.d, v1.d}[0], [sp], #16
|
||||
ld2r {v0.2d, v1.2d}, [sp], #16
|
||||
ld2 {v0.2d, v1.2d}, [sp], #32
|
||||
|
||||
ld2 {v0.s, v1.s}[0], [sp], x0
|
||||
ld2r {v0.2s, v1.2s}, [sp], x0
|
||||
ld2 {v0.2s, v1.2s}, [sp], x0
|
||||
|
||||
ld2 {v0.d, v1.d}[0], [sp], x0
|
||||
ld2r {v0.2d, v1.2d}, [sp], x0
|
||||
ld2 {v0.2d, v1.2d}, [sp], x0
|
||||
|
||||
# ALL: Iterations: 100
|
||||
# ALL-NEXT: Instructions: 1800
|
||||
|
||||
# M3-NEXT: Total Cycles: 10003
|
||||
# M4-NEXT: Total Cycles: 9803
|
||||
# M5-NEXT: Total Cycles: 11103
|
||||
|
||||
# ALL-NEXT: Total uOps: 5400
|
||||
|
||||
# ALL: Dispatch Width: 6
|
||||
|
||||
# M3-NEXT: uOps Per Cycle: 0.54
|
||||
# M3-NEXT: IPC: 0.18
|
||||
# M3-NEXT: Block RThroughput: 42.0
|
||||
|
||||
# M4-NEXT: uOps Per Cycle: 0.55
|
||||
# M4-NEXT: IPC: 0.18
|
||||
# M4-NEXT: Block RThroughput: 30.0
|
||||
|
||||
# M5-NEXT: uOps Per Cycle: 0.49
|
||||
# M5-NEXT: IPC: 0.16
|
||||
# M5-NEXT: Block RThroughput: 45.0
|
||||
|
||||
# ALL: Instruction Info:
|
||||
# ALL-NEXT: [1]: #uOps
|
||||
# ALL-NEXT: [2]: Latency
|
||||
# ALL-NEXT: [3]: RThroughput
|
||||
# ALL-NEXT: [4]: MayLoad
|
||||
# ALL-NEXT: [5]: MayStore
|
||||
# ALL-NEXT: [6]: HasSideEffects (U)
|
||||
|
||||
# ALL: [1] [2] [3] [4] [5] [6] Instructions:
|
||||
|
||||
# M3-NEXT: 3 7 1.00 * ld2 { v0.s, v1.s }[0], [sp]
|
||||
# M3-NEXT: 2 5 1.00 * ld2r { v0.2s, v1.2s }, [sp]
|
||||
# M3-NEXT: 2 10 5.00 * ld2 { v0.2s, v1.2s }, [sp]
|
||||
# M3-NEXT: 3 6 1.00 * ld2 { v0.d, v1.d }[0], [sp]
|
||||
# M3-NEXT: 2 5 1.00 * ld2r { v0.2d, v1.2d }, [sp]
|
||||
# M3-NEXT: 2 10 5.00 * ld2 { v0.2d, v1.2d }, [sp]
|
||||
# M3-NEXT: 4 7 1.00 * ld2 { v0.s, v1.s }[0], [sp], #8
|
||||
# M3-NEXT: 3 5 1.00 * ld2r { v0.2s, v1.2s }, [sp], #8
|
||||
# M3-NEXT: 3 10 5.00 * ld2 { v0.2s, v1.2s }, [sp], #16
|
||||
# M3-NEXT: 4 6 1.00 * ld2 { v0.d, v1.d }[0], [sp], #16
|
||||
# M3-NEXT: 3 5 1.00 * ld2r { v0.2d, v1.2d }, [sp], #16
|
||||
# M3-NEXT: 3 10 5.00 * ld2 { v0.2d, v1.2d }, [sp], #32
|
||||
# M3-NEXT: 4 7 1.00 * ld2 { v0.s, v1.s }[0], [sp], x0
|
||||
# M3-NEXT: 3 5 1.00 * ld2r { v0.2s, v1.2s }, [sp], x0
|
||||
# M3-NEXT: 3 10 5.00 * ld2 { v0.2s, v1.2s }, [sp], x0
|
||||
# M3-NEXT: 4 6 1.00 * ld2 { v0.d, v1.d }[0], [sp], x0
|
||||
# M3-NEXT: 3 5 1.00 * ld2r { v0.2d, v1.2d }, [sp], x0
|
||||
# M3-NEXT: 3 10 5.00 * ld2 { v0.2d, v1.2d }, [sp], x0
|
||||
|
||||
# M4-NEXT: 3 6 1.00 * ld2 { v0.s, v1.s }[0], [sp]
|
||||
# M4-NEXT: 2 5 1.00 * ld2r { v0.2s, v1.2s }, [sp]
|
||||
# M4-NEXT: 2 10 3.00 * ld2 { v0.2s, v1.2s }, [sp]
|
||||
# M4-NEXT: 3 6 1.00 * ld2 { v0.d, v1.d }[0], [sp]
|
||||
# M4-NEXT: 2 5 1.00 * ld2r { v0.2d, v1.2d }, [sp]
|
||||
# M4-NEXT: 2 10 3.00 * ld2 { v0.2d, v1.2d }, [sp]
|
||||
# M4-NEXT: 4 6 1.00 * ld2 { v0.s, v1.s }[0], [sp], #8
|
||||
# M4-NEXT: 3 5 1.00 * ld2r { v0.2s, v1.2s }, [sp], #8
|
||||
# M4-NEXT: 3 10 3.00 * ld2 { v0.2s, v1.2s }, [sp], #16
|
||||
# M4-NEXT: 4 6 1.00 * ld2 { v0.d, v1.d }[0], [sp], #16
|
||||
# M4-NEXT: 3 5 1.00 * ld2r { v0.2d, v1.2d }, [sp], #16
|
||||
# M4-NEXT: 3 10 3.00 * ld2 { v0.2d, v1.2d }, [sp], #32
|
||||
# M4-NEXT: 4 6 1.00 * ld2 { v0.s, v1.s }[0], [sp], x0
|
||||
# M4-NEXT: 3 5 1.00 * ld2r { v0.2s, v1.2s }, [sp], x0
|
||||
# M4-NEXT: 3 10 3.00 * ld2 { v0.2s, v1.2s }, [sp], x0
|
||||
# M4-NEXT: 4 6 1.00 * ld2 { v0.d, v1.d }[0], [sp], x0
|
||||
# M4-NEXT: 3 5 1.00 * ld2r { v0.2d, v1.2d }, [sp], x0
|
||||
# M4-NEXT: 3 10 3.00 * ld2 { v0.2d, v1.2d }, [sp], x0
|
||||
|
||||
# M5-NEXT: 3 7 1.00 * ld2 { v0.s, v1.s }[0], [sp]
|
||||
# M5-NEXT: 2 6 1.00 * ld2r { v0.2s, v1.2s }, [sp]
|
||||
# M5-NEXT: 2 11 5.50 * ld2 { v0.2s, v1.2s }, [sp]
|
||||
# M5-NEXT: 3 7 1.00 * ld2 { v0.d, v1.d }[0], [sp]
|
||||
# M5-NEXT: 2 6 1.00 * ld2r { v0.2d, v1.2d }, [sp]
|
||||
# M5-NEXT: 2 11 5.50 * ld2 { v0.2d, v1.2d }, [sp]
|
||||
# M5-NEXT: 4 7 1.00 * ld2 { v0.s, v1.s }[0], [sp], #8
|
||||
# M5-NEXT: 3 6 1.00 * ld2r { v0.2s, v1.2s }, [sp], #8
|
||||
# M5-NEXT: 3 11 5.50 * ld2 { v0.2s, v1.2s }, [sp], #16
|
||||
# M5-NEXT: 4 7 1.00 * ld2 { v0.d, v1.d }[0], [sp], #16
|
||||
# M5-NEXT: 3 6 1.00 * ld2r { v0.2d, v1.2d }, [sp], #16
|
||||
# M5-NEXT: 3 11 5.50 * ld2 { v0.2d, v1.2d }, [sp], #32
|
||||
# M5-NEXT: 4 7 1.00 * ld2 { v0.s, v1.s }[0], [sp], x0
|
||||
# M5-NEXT: 3 6 1.00 * ld2r { v0.2s, v1.2s }, [sp], x0
|
||||
# M5-NEXT: 3 11 5.50 * ld2 { v0.2s, v1.2s }, [sp], x0
|
||||
# M5-NEXT: 4 7 1.00 * ld2 { v0.d, v1.d }[0], [sp], x0
|
||||
# M5-NEXT: 3 6 1.00 * ld2r { v0.2d, v1.2d }, [sp], x0
|
||||
# M5-NEXT: 3 11 5.50 * ld2 { v0.2d, v1.2d }, [sp], x0
|
|
@ -0,0 +1,118 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
|
||||
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3
|
||||
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4
|
||||
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5
|
||||
|
||||
ld3 {v0.s, v1.s, v2.s}[0], [sp]
|
||||
ld3r {v0.2s, v1.2s, v2.2s}, [sp]
|
||||
ld3 {v0.2s, v1.2s, v2.2s}, [sp]
|
||||
|
||||
ld3 {v0.d, v1.d, v2.d}[0], [sp]
|
||||
ld3r {v0.2d, v1.2d, v2.2d}, [sp]
|
||||
ld3 {v0.2d, v1.2d, v2.2d}, [sp]
|
||||
|
||||
ld3 {v0.s, v1.s, v2.s}[0], [sp], #12
|
||||
ld3r {v0.2s, v1.2s, v2.2s}, [sp], #12
|
||||
ld3 {v0.2s, v1.2s, v2.2s}, [sp], #24
|
||||
|
||||
ld3 {v0.d, v1.d, v2.d}[0], [sp], #24
|
||||
ld3r {v0.2d, v1.2d, v2.2d}, [sp], #24
|
||||
ld3 {v0.2d, v1.2d, v2.2d}, [sp], #48
|
||||
|
||||
ld3 {v0.s, v1.s, v2.s}[0], [sp], x0
|
||||
ld3r {v0.2s, v1.2s, v2.2s}, [sp], x0
|
||||
ld3 {v0.2s, v1.2s, v2.2s}, [sp], x0
|
||||
|
||||
ld3 {v0.d, v1.d, v2.d}[0], [sp], x0
|
||||
ld3r {v0.2d, v1.2d, v2.2d}, [sp], x0
|
||||
ld3 {v0.2d, v1.2d, v2.2d}, [sp], x0
|
||||
|
||||
# ALL: Iterations: 100
|
||||
# ALL-NEXT: Instructions: 1800
|
||||
|
||||
# M3-NEXT: Total Cycles: 12501
|
||||
# M4-NEXT: Total Cycles: 11804
|
||||
# M5-NEXT: Total Cycles: 12903
|
||||
|
||||
# ALL-NEXT: Total uOps: 7500
|
||||
|
||||
# ALL: Dispatch Width: 6
|
||||
|
||||
# M3-NEXT: uOps Per Cycle: 0.60
|
||||
# M3-NEXT: IPC: 0.14
|
||||
# M3-NEXT: Block RThroughput: 84.0
|
||||
|
||||
# M4-NEXT: uOps Per Cycle: 0.64
|
||||
# M4-NEXT: IPC: 0.15
|
||||
# M4-NEXT: Block RThroughput: 54.0
|
||||
|
||||
# M5-NEXT: uOps Per Cycle: 0.58
|
||||
# M5-NEXT: IPC: 0.14
|
||||
# M5-NEXT: Block RThroughput: 22.5
|
||||
|
||||
# ALL: Instruction Info:
|
||||
# ALL-NEXT: [1]: #uOps
|
||||
# ALL-NEXT: [2]: Latency
|
||||
# ALL-NEXT: [3]: RThroughput
|
||||
# ALL-NEXT: [4]: MayLoad
|
||||
# ALL-NEXT: [5]: MayStore
|
||||
# ALL-NEXT: [6]: HasSideEffects (U)
|
||||
|
||||
# ALL: [1] [2] [3] [4] [5] [6] Instructions:
|
||||
|
||||
# M3-NEXT: 4 7 1.00 * ld3 { v0.s, v1.s, v2.s }[0], [sp]
|
||||
# M3-NEXT: 3 6 1.50 * ld3r { v0.2s, v1.2s, v2.2s }, [sp]
|
||||
# M3-NEXT: 3 12 9.00 * ld3 { v0.2s, v1.2s, v2.2s }, [sp]
|
||||
# M3-NEXT: 5 6 6.00 * ld3 { v0.d, v1.d, v2.d }[0], [sp]
|
||||
# M3-NEXT: 3 6 1.50 * ld3r { v0.2d, v1.2d, v2.2d }, [sp]
|
||||
# M3-NEXT: 3 12 9.00 * ld3 { v0.2d, v1.2d, v2.2d }, [sp]
|
||||
# M3-NEXT: 5 7 1.00 * ld3 { v0.s, v1.s, v2.s }[0], [sp], #12
|
||||
# M3-NEXT: 4 6 1.50 * ld3r { v0.2s, v1.2s, v2.2s }, [sp], #12
|
||||
# M3-NEXT: 4 12 9.00 * ld3 { v0.2s, v1.2s, v2.2s }, [sp], #24
|
||||
# M3-NEXT: 6 6 6.00 * ld3 { v0.d, v1.d, v2.d }[0], [sp], #24
|
||||
# M3-NEXT: 4 6 1.50 * ld3r { v0.2d, v1.2d, v2.2d }, [sp], #24
|
||||
# M3-NEXT: 4 12 9.00 * ld3 { v0.2d, v1.2d, v2.2d }, [sp], #48
|
||||
# M3-NEXT: 5 7 1.00 * ld3 { v0.s, v1.s, v2.s }[0], [sp], x0
|
||||
# M3-NEXT: 4 6 1.50 * ld3r { v0.2s, v1.2s, v2.2s }, [sp], x0
|
||||
# M3-NEXT: 4 12 9.00 * ld3 { v0.2s, v1.2s, v2.2s }, [sp], x0
|
||||
# M3-NEXT: 6 6 6.00 * ld3 { v0.d, v1.d, v2.d }[0], [sp], x0
|
||||
# M3-NEXT: 4 6 1.50 * ld3r { v0.2d, v1.2d, v2.2d }, [sp], x0
|
||||
# M3-NEXT: 4 12 9.00 * ld3 { v0.2d, v1.2d, v2.2d }, [sp], x0
|
||||
|
||||
# M4-NEXT: 4 7 1.50 * ld3 { v0.s, v1.s, v2.s }[0], [sp]
|
||||
# M4-NEXT: 3 6 1.50 * ld3r { v0.2s, v1.2s, v2.2s }, [sp]
|
||||
# M4-NEXT: 3 12 4.50 * ld3 { v0.2s, v1.2s, v2.2s }, [sp]
|
||||
# M4-NEXT: 5 7 4.50 * ld3 { v0.d, v1.d, v2.d }[0], [sp]
|
||||
# M4-NEXT: 3 6 1.50 * ld3r { v0.2d, v1.2d, v2.2d }, [sp]
|
||||
# M4-NEXT: 3 12 4.50 * ld3 { v0.2d, v1.2d, v2.2d }, [sp]
|
||||
# M4-NEXT: 5 7 1.50 * ld3 { v0.s, v1.s, v2.s }[0], [sp], #12
|
||||
# M4-NEXT: 4 6 1.50 * ld3r { v0.2s, v1.2s, v2.2s }, [sp], #12
|
||||
# M4-NEXT: 4 12 4.50 * ld3 { v0.2s, v1.2s, v2.2s }, [sp], #24
|
||||
# M4-NEXT: 6 7 4.50 * ld3 { v0.d, v1.d, v2.d }[0], [sp], #24
|
||||
# M4-NEXT: 4 6 1.50 * ld3r { v0.2d, v1.2d, v2.2d }, [sp], #24
|
||||
# M4-NEXT: 4 12 4.50 * ld3 { v0.2d, v1.2d, v2.2d }, [sp], #48
|
||||
# M4-NEXT: 5 7 1.50 * ld3 { v0.s, v1.s, v2.s }[0], [sp], x0
|
||||
# M4-NEXT: 4 6 1.50 * ld3r { v0.2s, v1.2s, v2.2s }, [sp], x0
|
||||
# M4-NEXT: 4 12 4.50 * ld3 { v0.2s, v1.2s, v2.2s }, [sp], x0
|
||||
# M4-NEXT: 6 7 4.50 * ld3 { v0.d, v1.d, v2.d }[0], [sp], x0
|
||||
# M4-NEXT: 4 6 1.50 * ld3r { v0.2d, v1.2d, v2.2d }, [sp], x0
|
||||
# M4-NEXT: 4 12 4.50 * ld3 { v0.2d, v1.2d, v2.2d }, [sp], x0
|
||||
|
||||
# M5-NEXT: 4 8 1.50 * ld3 { v0.s, v1.s, v2.s }[0], [sp]
|
||||
# M5-NEXT: 3 7 1.50 * ld3r { v0.2s, v1.2s, v2.2s }, [sp]
|
||||
# M5-NEXT: 3 13 1.50 * ld3 { v0.2s, v1.2s, v2.2s }, [sp]
|
||||
# M5-NEXT: 5 8 1.50 * ld3 { v0.d, v1.d, v2.d }[0], [sp]
|
||||
# M5-NEXT: 3 7 1.50 * ld3r { v0.2d, v1.2d, v2.2d }, [sp]
|
||||
# M5-NEXT: 3 13 1.50 * ld3 { v0.2d, v1.2d, v2.2d }, [sp]
|
||||
# M5-NEXT: 5 8 1.50 * ld3 { v0.s, v1.s, v2.s }[0], [sp], #12
|
||||
# M5-NEXT: 4 7 1.50 * ld3r { v0.2s, v1.2s, v2.2s }, [sp], #12
|
||||
# M5-NEXT: 4 13 1.50 * ld3 { v0.2s, v1.2s, v2.2s }, [sp], #24
|
||||
# M5-NEXT: 6 8 1.50 * ld3 { v0.d, v1.d, v2.d }[0], [sp], #24
|
||||
# M5-NEXT: 4 7 1.50 * ld3r { v0.2d, v1.2d, v2.2d }, [sp], #24
|
||||
# M5-NEXT: 4 13 1.50 * ld3 { v0.2d, v1.2d, v2.2d }, [sp], #48
|
||||
# M5-NEXT: 5 8 1.50 * ld3 { v0.s, v1.s, v2.s }[0], [sp], x0
|
||||
# M5-NEXT: 4 7 1.50 * ld3r { v0.2s, v1.2s, v2.2s }, [sp], x0
|
||||
# M5-NEXT: 4 13 1.50 * ld3 { v0.2s, v1.2s, v2.2s }, [sp], x0
|
||||
# M5-NEXT: 6 8 1.50 * ld3 { v0.d, v1.d, v2.d }[0], [sp], x0
|
||||
# M5-NEXT: 4 7 1.50 * ld3r { v0.2d, v1.2d, v2.2d }, [sp], x0
|
||||
# M5-NEXT: 4 13 1.50 * ld3 { v0.2d, v1.2d, v2.2d }, [sp], x0
|
|
@ -0,0 +1,118 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
|
||||
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3
|
||||
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4
|
||||
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5
|
||||
|
||||
ld4 {v0.s, v1.s, v2.s, v3.s}[0], [sp]
|
||||
ld4r {v0.2s, v1.2s, v2.2s, v3.2s}, [sp]
|
||||
ld4 {v0.2s, v1.2s, v2.2s, v3.2s}, [sp]
|
||||
|
||||
ld4 {v0.d, v1.d, v2.d, v3.d}[0], [sp]
|
||||
ld4r {v0.2d, v1.2d, v2.2d, v3.2d}, [sp]
|
||||
ld4 {v0.2d, v1.2d, v2.2d, v3.2d}, [sp]
|
||||
|
||||
ld4 {v0.s, v1.s, v2.s, v3.s}[0], [sp], #16
|
||||
ld4r {v0.2s, v1.2s, v2.2s, v3.2s}, [sp], #16
|
||||
ld4 {v0.2s, v1.2s, v2.2s, v3.2s}, [sp], #32
|
||||
|
||||
ld4 {v0.d, v1.d, v2.d, v3.d}[0], [sp], #32
|
||||
ld4r {v0.2d, v1.2d, v2.2d, v3.2d}, [sp], #32
|
||||
ld4 {v0.2d, v1.2d, v2.2d, v3.2d}, [sp], #64
|
||||
|
||||
ld4 {v0.s, v1.s, v2.s, v3.s}[0], [sp], x0
|
||||
ld4r {v0.2s, v1.2s, v2.2s, v3.2s}, [sp], x0
|
||||
ld4 {v0.2s, v1.2s, v2.2s, v3.2s}, [sp], x0
|
||||
|
||||
ld4 {v0.d, v1.d, v2.d, v3.d}[0], [sp], x0
|
||||
ld4r {v0.2d, v1.2d, v2.2d, v3.2d}, [sp], x0
|
||||
ld4 {v0.2d, v1.2d, v2.2d, v3.2d}, [sp], x0
|
||||
|
||||
# ALL: Iterations: 100
|
||||
# ALL-NEXT: Instructions: 1800
|
||||
|
||||
# M3-NEXT: Total Cycles: 15598
|
||||
# M4-NEXT: Total Cycles: 13004
|
||||
# M5-NEXT: Total Cycles: 14304
|
||||
|
||||
# ALL-NEXT: Total uOps: 9300
|
||||
|
||||
# ALL: Dispatch Width: 6
|
||||
|
||||
# M3-NEXT: uOps Per Cycle: 0.60
|
||||
# M3-NEXT: IPC: 0.12
|
||||
# M3-NEXT: Block RThroughput: 108.0
|
||||
|
||||
# M4-NEXT: uOps Per Cycle: 0.72
|
||||
# M4-NEXT: IPC: 0.14
|
||||
# M4-NEXT: Block RThroughput: 61.5
|
||||
|
||||
# M5-NEXT: uOps Per Cycle: 0.65
|
||||
# M5-NEXT: IPC: 0.13
|
||||
# M5-NEXT: Block RThroughput: 40.5
|
||||
|
||||
# ALL: Instruction Info:
|
||||
# ALL-NEXT: [1]: #uOps
|
||||
# ALL-NEXT: [2]: Latency
|
||||
# ALL-NEXT: [3]: RThroughput
|
||||
# ALL-NEXT: [4]: MayLoad
|
||||
# ALL-NEXT: [5]: MayStore
|
||||
# ALL-NEXT: [6]: HasSideEffects (U)
|
||||
|
||||
# ALL: [1] [2] [3] [4] [5] [6] Instructions:
|
||||
|
||||
# M3-NEXT: 5 9 2.00 * ld4 { v0.s, v1.s, v2.s, v3.s }[0], [sp]
|
||||
# M3-NEXT: 4 6 2.00 * ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [sp]
|
||||
# M3-NEXT: 4 14 12.00 * ld4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp]
|
||||
# M3-NEXT: 6 7 6.00 * ld4 { v0.d, v1.d, v2.d, v3.d }[0], [sp]
|
||||
# M3-NEXT: 4 6 2.00 * ld4r { v0.2d, v1.2d, v2.2d, v3.2d }, [sp]
|
||||
# M3-NEXT: 4 14 12.00 * ld4 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp]
|
||||
# M3-NEXT: 6 9 2.00 * ld4 { v0.s, v1.s, v2.s, v3.s }[0], [sp], #16
|
||||
# M3-NEXT: 5 6 2.00 * ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], #16
|
||||
# M3-NEXT: 5 14 12.00 * ld4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], #32
|
||||
# M3-NEXT: 7 7 6.00 * ld4 { v0.d, v1.d, v2.d, v3.d }[0], [sp], #32
|
||||
# M3-NEXT: 5 6 2.00 * ld4r { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], #32
|
||||
# M3-NEXT: 5 14 12.00 * ld4 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], #64
|
||||
# M3-NEXT: 6 9 2.00 * ld4 { v0.s, v1.s, v2.s, v3.s }[0], [sp], x0
|
||||
# M3-NEXT: 5 6 2.00 * ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], x0
|
||||
# M3-NEXT: 5 14 12.00 * ld4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], x0
|
||||
# M3-NEXT: 7 7 6.00 * ld4 { v0.d, v1.d, v2.d, v3.d }[0], [sp], x0
|
||||
# M3-NEXT: 5 6 2.00 * ld4r { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], x0
|
||||
# M3-NEXT: 5 14 12.00 * ld4 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], x0
|
||||
|
||||
# M4-NEXT: 5 7 2.00 * ld4 { v0.s, v1.s, v2.s, v3.s }[0], [sp]
|
||||
# M4-NEXT: 4 6 2.00 * ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [sp]
|
||||
# M4-NEXT: 4 14 6.00 * ld4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp]
|
||||
# M4-NEXT: 6 7 3.00 * ld4 { v0.d, v1.d, v2.d, v3.d }[0], [sp]
|
||||
# M4-NEXT: 4 6 2.00 * ld4r { v0.2d, v1.2d, v2.2d, v3.2d }, [sp]
|
||||
# M4-NEXT: 4 14 6.00 * ld4 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp]
|
||||
# M4-NEXT: 6 7 2.00 * ld4 { v0.s, v1.s, v2.s, v3.s }[0], [sp], #16
|
||||
# M4-NEXT: 5 6 2.00 * ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], #16
|
||||
# M4-NEXT: 5 14 6.00 * ld4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], #32
|
||||
# M4-NEXT: 7 7 3.00 * ld4 { v0.d, v1.d, v2.d, v3.d }[0], [sp], #32
|
||||
# M4-NEXT: 5 6 2.00 * ld4r { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], #32
|
||||
# M4-NEXT: 5 14 6.00 * ld4 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], #64
|
||||
# M4-NEXT: 6 7 2.00 * ld4 { v0.s, v1.s, v2.s, v3.s }[0], [sp], x0
|
||||
# M4-NEXT: 5 6 2.00 * ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], x0
|
||||
# M4-NEXT: 5 14 6.00 * ld4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], x0
|
||||
# M4-NEXT: 7 7 3.00 * ld4 { v0.d, v1.d, v2.d, v3.d }[0], [sp], x0
|
||||
# M4-NEXT: 5 6 2.00 * ld4r { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], x0
|
||||
# M4-NEXT: 5 14 6.00 * ld4 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], x0
|
||||
|
||||
# M5-NEXT: 5 8 2.00 * ld4 { v0.s, v1.s, v2.s, v3.s }[0], [sp]
|
||||
# M5-NEXT: 4 7 2.00 * ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [sp]
|
||||
# M5-NEXT: 4 15 4.00 * ld4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp]
|
||||
# M5-NEXT: 6 8 2.00 * ld4 { v0.d, v1.d, v2.d, v3.d }[0], [sp]
|
||||
# M5-NEXT: 4 7 2.00 * ld4r { v0.2d, v1.2d, v2.2d, v3.2d }, [sp]
|
||||
# M5-NEXT: 4 15 4.00 * ld4 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp]
|
||||
# M5-NEXT: 6 8 2.00 * ld4 { v0.s, v1.s, v2.s, v3.s }[0], [sp], #16
|
||||
# M5-NEXT: 5 7 2.00 * ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], #16
|
||||
# M5-NEXT: 5 15 4.00 * ld4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], #32
|
||||
# M5-NEXT: 7 8 2.00 * ld4 { v0.d, v1.d, v2.d, v3.d }[0], [sp], #32
|
||||
# M5-NEXT: 5 7 2.00 * ld4r { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], #32
|
||||
# M5-NEXT: 5 15 4.00 * ld4 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], #64
|
||||
# M5-NEXT: 6 8 2.00 * ld4 { v0.s, v1.s, v2.s, v3.s }[0], [sp], x0
|
||||
# M5-NEXT: 5 7 2.00 * ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], x0
|
||||
# M5-NEXT: 5 15 4.00 * ld4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], x0
|
||||
# M5-NEXT: 7 8 2.00 * ld4 { v0.d, v1.d, v2.d, v3.d }[0], [sp], x0
|
||||
# M5-NEXT: 5 7 2.00 * ld4r { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], x0
|
||||
# M5-NEXT: 5 15 4.00 * ld4 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], x0
|
|
@ -0,0 +1,169 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
|
||||
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3
|
||||
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4
|
||||
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5
|
||||
|
||||
st1 {v0.s}[0], [sp]
|
||||
st1 {v0.2s}, [sp]
|
||||
st1 {v0.2s, v1.2s}, [sp]
|
||||
st1 {v0.2s, v1.2s, v2.2s}, [sp]
|
||||
st1 {v0.2s, v1.2s, v2.2s, v3.2s}, [sp]
|
||||
|
||||
st1 {v0.d}[0], [sp]
|
||||
st1 {v0.2d}, [sp]
|
||||
st1 {v0.2d, v1.2d}, [sp]
|
||||
st1 {v0.2d, v1.2d, v2.2d}, [sp]
|
||||
st1 {v0.2d, v1.2d, v2.2d, v3.2d}, [sp]
|
||||
|
||||
st1 {v0.s}[0], [sp], #4
|
||||
st1 {v0.2s}, [sp], #8
|
||||
st1 {v0.2s, v1.2s}, [sp], #16
|
||||
st1 {v0.2s, v1.2s, v2.2s}, [sp], #24
|
||||
st1 {v0.2s, v1.2s, v2.2s, v3.2s}, [sp], #32
|
||||
|
||||
st1 {v0.d}[0], [sp], #8
|
||||
st1 {v0.2d}, [sp], #16
|
||||
st1 {v0.2d, v1.2d}, [sp], #32
|
||||
st1 {v0.2d, v1.2d, v2.2d}, [sp], #48
|
||||
st1 {v0.2d, v1.2d, v2.2d, v3.2d}, [sp], #64
|
||||
|
||||
st1 {v0.s}[0], [sp], x0
|
||||
st1 {v0.2s}, [sp], x0
|
||||
st1 {v0.2s, v1.2s}, [sp], x0
|
||||
st1 {v0.2s, v1.2s, v2.2s}, [sp], x0
|
||||
st1 {v0.2s, v1.2s, v2.2s, v3.2s}, [sp], x0
|
||||
|
||||
st1 {v0.d}[0], [sp], x0
|
||||
st1 {v0.2d}, [sp], x0
|
||||
st1 {v0.2d, v1.2d}, [sp], x0
|
||||
st1 {v0.2d, v1.2d, v2.2d}, [sp], x0
|
||||
st1 {v0.2d, v1.2d, v2.2d, v3.2d}, [sp], x0
|
||||
|
||||
# ALL: Iterations: 100
|
||||
# ALL-NEXT: Instructions: 3000
|
||||
|
||||
# M3-NEXT: Total Cycles: 10203
|
||||
# M3-NEXT: Total uOps: 8400
|
||||
|
||||
# M4-NEXT: Total Cycles: 6603
|
||||
# M4-NEXT: Total uOps: 8600
|
||||
|
||||
# M5-NEXT: Total Cycles: 6603
|
||||
# M5-NEXT: Total uOps: 8600
|
||||
|
||||
# ALL: Dispatch Width: 6
|
||||
|
||||
# M3-NEXT: uOps Per Cycle: 0.82
|
||||
# M3-NEXT: IPC: 0.29
|
||||
# M3-NEXT: Block RThroughput: 72.0
|
||||
|
||||
# M4-NEXT: uOps Per Cycle: 1.30
|
||||
# M4-NEXT: IPC: 0.45
|
||||
# M4-NEXT: Block RThroughput: 33.0
|
||||
|
||||
# M5-NEXT: uOps Per Cycle: 1.30
|
||||
# M5-NEXT: IPC: 0.45
|
||||
# M5-NEXT: Block RThroughput: 33.0
|
||||
|
||||
# ALL: Instruction Info:
|
||||
# ALL-NEXT: [1]: #uOps
|
||||
# ALL-NEXT: [2]: Latency
|
||||
# ALL-NEXT: [3]: RThroughput
|
||||
# ALL-NEXT: [4]: MayLoad
|
||||
# ALL-NEXT: [5]: MayStore
|
||||
# ALL-NEXT: [6]: HasSideEffects (U)
|
||||
|
||||
# ALL: [1] [2] [3] [4] [5] [6] Instructions:
|
||||
|
||||
# M3-NEXT: 4 7 3.00 * st1 { v0.s }[0], [sp]
|
||||
# M3-NEXT: 1 1 1.00 * st1 { v0.2s }, [sp]
|
||||
# M3-NEXT: 2 2 2.00 * st1 { v0.2s, v1.2s }, [sp]
|
||||
# M3-NEXT: 3 3 3.00 * st1 { v0.2s, v1.2s, v2.2s }, [sp]
|
||||
# M3-NEXT: 4 4 4.00 * st1 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp]
|
||||
# M3-NEXT: 4 7 3.00 * st1 { v0.d }[0], [sp]
|
||||
# M3-NEXT: 1 1 1.00 * st1 { v0.2d }, [sp]
|
||||
# M3-NEXT: 2 2 2.00 * st1 { v0.2d, v1.2d }, [sp]
|
||||
# M3-NEXT: 3 3 3.00 * st1 { v0.2d, v1.2d, v2.2d }, [sp]
|
||||
# M3-NEXT: 4 4 4.00 * st1 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp]
|
||||
# M3-NEXT: 4 7 3.00 * st1 { v0.s }[0], [sp], #4
|
||||
# M3-NEXT: 1 1 1.00 * st1 { v0.2s }, [sp], #8
|
||||
# M3-NEXT: 2 2 2.00 * st1 { v0.2s, v1.2s }, [sp], #16
|
||||
# M3-NEXT: 3 3 3.00 * st1 { v0.2s, v1.2s, v2.2s }, [sp], #24
|
||||
# M3-NEXT: 4 4 4.00 * st1 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], #32
|
||||
# M3-NEXT: 4 7 3.00 * st1 { v0.d }[0], [sp], #8
|
||||
# M3-NEXT: 1 1 1.00 * st1 { v0.2d }, [sp], #16
|
||||
# M3-NEXT: 2 2 2.00 * st1 { v0.2d, v1.2d }, [sp], #32
|
||||
# M3-NEXT: 3 3 3.00 * st1 { v0.2d, v1.2d, v2.2d }, [sp], #48
|
||||
# M3-NEXT: 4 4 4.00 * st1 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], #64
|
||||
# M3-NEXT: 4 7 3.00 * st1 { v0.s }[0], [sp], x0
|
||||
# M3-NEXT: 1 1 1.00 * st1 { v0.2s }, [sp], x0
|
||||
# M3-NEXT: 2 2 2.00 * st1 { v0.2s, v1.2s }, [sp], x0
|
||||
# M3-NEXT: 3 3 3.00 * st1 { v0.2s, v1.2s, v2.2s }, [sp], x0
|
||||
# M3-NEXT: 4 4 4.00 * st1 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], x0
|
||||
# M3-NEXT: 4 7 3.00 * st1 { v0.d }[0], [sp], x0
|
||||
# M3-NEXT: 1 1 1.00 * st1 { v0.2d }, [sp], x0
|
||||
# M3-NEXT: 2 2 2.00 * st1 { v0.2d, v1.2d }, [sp], x0
|
||||
# M3-NEXT: 3 3 3.00 * st1 { v0.2d, v1.2d, v2.2d }, [sp], x0
|
||||
# M3-NEXT: 4 4 4.00 * st1 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], x0
|
||||
|
||||
# M4-NEXT: 1 1 0.50 * st1 { v0.s }[0], [sp]
|
||||
# M4-NEXT: 1 1 0.50 * st1 { v0.2s }, [sp]
|
||||
# M4-NEXT: 2 2 1.00 * st1 { v0.2s, v1.2s }, [sp]
|
||||
# M4-NEXT: 3 3 1.50 * st1 { v0.2s, v1.2s, v2.2s }, [sp]
|
||||
# M4-NEXT: 4 4 2.00 * st1 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp]
|
||||
# M4-NEXT: 1 1 0.50 * st1 { v0.d }[0], [sp]
|
||||
# M4-NEXT: 1 1 0.50 * st1 { v0.2d }, [sp]
|
||||
# M4-NEXT: 2 2 1.00 * st1 { v0.2d, v1.2d }, [sp]
|
||||
# M4-NEXT: 3 3 1.50 * st1 { v0.2d, v1.2d, v2.2d }, [sp]
|
||||
# M4-NEXT: 4 4 2.00 * st1 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp]
|
||||
# M4-NEXT: 2 1 0.50 * st1 { v0.s }[0], [sp], #4
|
||||
# M4-NEXT: 2 1 0.50 * st1 { v0.2s }, [sp], #8
|
||||
# M4-NEXT: 3 2 1.00 * st1 { v0.2s, v1.2s }, [sp], #16
|
||||
# M4-NEXT: 4 3 1.50 * st1 { v0.2s, v1.2s, v2.2s }, [sp], #24
|
||||
# M4-NEXT: 5 4 2.00 * st1 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], #32
|
||||
# M4-NEXT: 2 1 0.50 * st1 { v0.d }[0], [sp], #8
|
||||
# M4-NEXT: 2 1 0.50 * st1 { v0.2d }, [sp], #16
|
||||
# M4-NEXT: 3 2 1.00 * st1 { v0.2d, v1.2d }, [sp], #32
|
||||
# M4-NEXT: 4 3 1.50 * st1 { v0.2d, v1.2d, v2.2d }, [sp], #48
|
||||
# M4-NEXT: 5 4 2.00 * st1 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], #64
|
||||
# M4-NEXT: 2 1 0.50 * st1 { v0.s }[0], [sp], x0
|
||||
# M4-NEXT: 2 1 0.50 * st1 { v0.2s }, [sp], x0
|
||||
# M4-NEXT: 3 2 1.00 * st1 { v0.2s, v1.2s }, [sp], x0
|
||||
# M4-NEXT: 4 3 1.50 * st1 { v0.2s, v1.2s, v2.2s }, [sp], x0
|
||||
# M4-NEXT: 5 4 2.00 * st1 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], x0
|
||||
# M4-NEXT: 2 1 0.50 * st1 { v0.d }[0], [sp], x0
|
||||
# M4-NEXT: 2 1 0.50 * st1 { v0.2d }, [sp], x0
|
||||
# M4-NEXT: 3 2 1.00 * st1 { v0.2d, v1.2d }, [sp], x0
|
||||
# M4-NEXT: 4 3 1.50 * st1 { v0.2d, v1.2d, v2.2d }, [sp], x0
|
||||
# M4-NEXT: 5 4 2.00 * st1 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], x0
|
||||
|
||||
# M5-NEXT: 1 1 0.50 * st1 { v0.s }[0], [sp]
|
||||
# M5-NEXT: 1 1 0.50 * st1 { v0.2s }, [sp]
|
||||
# M5-NEXT: 2 2 1.00 * st1 { v0.2s, v1.2s }, [sp]
|
||||
# M5-NEXT: 3 3 1.50 * st1 { v0.2s, v1.2s, v2.2s }, [sp]
|
||||
# M5-NEXT: 4 4 2.00 * st1 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp]
|
||||
# M5-NEXT: 1 1 0.50 * st1 { v0.d }[0], [sp]
|
||||
# M5-NEXT: 1 1 0.50 * st1 { v0.2d }, [sp]
|
||||
# M5-NEXT: 2 2 1.00 * st1 { v0.2d, v1.2d }, [sp]
|
||||
# M5-NEXT: 3 3 1.50 * st1 { v0.2d, v1.2d, v2.2d }, [sp]
|
||||
# M5-NEXT: 4 4 2.00 * st1 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp]
|
||||
# M5-NEXT: 2 1 0.50 * st1 { v0.s }[0], [sp], #4
|
||||
# M5-NEXT: 2 1 0.50 * st1 { v0.2s }, [sp], #8
|
||||
# M5-NEXT: 3 2 1.00 * st1 { v0.2s, v1.2s }, [sp], #16
|
||||
# M5-NEXT: 4 3 1.50 * st1 { v0.2s, v1.2s, v2.2s }, [sp], #24
|
||||
# M5-NEXT: 5 4 2.00 * st1 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], #32
|
||||
# M5-NEXT: 2 1 0.50 * st1 { v0.d }[0], [sp], #8
|
||||
# M5-NEXT: 2 1 0.50 * st1 { v0.2d }, [sp], #16
|
||||
# M5-NEXT: 3 2 1.00 * st1 { v0.2d, v1.2d }, [sp], #32
|
||||
# M5-NEXT: 4 3 1.50 * st1 { v0.2d, v1.2d, v2.2d }, [sp], #48
|
||||
# M5-NEXT: 5 4 2.00 * st1 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], #64
|
||||
# M5-NEXT: 2 1 0.50 * st1 { v0.s }[0], [sp], x0
|
||||
# M5-NEXT: 2 1 0.50 * st1 { v0.2s }, [sp], x0
|
||||
# M5-NEXT: 3 2 1.00 * st1 { v0.2s, v1.2s }, [sp], x0
|
||||
# M5-NEXT: 4 3 1.50 * st1 { v0.2s, v1.2s, v2.2s }, [sp], x0
|
||||
# M5-NEXT: 5 4 2.00 * st1 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], x0
|
||||
# M5-NEXT: 2 1 0.50 * st1 { v0.d }[0], [sp], x0
|
||||
# M5-NEXT: 2 1 0.50 * st1 { v0.2d }, [sp], x0
|
||||
# M5-NEXT: 3 2 1.00 * st1 { v0.2d, v1.2d }, [sp], x0
|
||||
# M5-NEXT: 4 3 1.50 * st1 { v0.2d, v1.2d, v2.2d }, [sp], x0
|
||||
# M5-NEXT: 5 4 2.00 * st1 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], x0
|
|
@ -0,0 +1,97 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
|
||||
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3
|
||||
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4
|
||||
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5
|
||||
|
||||
st2 {v0.s, v1.s}[0], [sp]
|
||||
st2 {v0.2s, v1.2s}, [sp]
|
||||
|
||||
st2 {v0.d, v1.d}[0], [sp]
|
||||
st2 {v0.2d, v1.2d}, [sp]
|
||||
|
||||
st2 {v0.s, v1.s}[0], [sp], #8
|
||||
st2 {v0.2s, v1.2s}, [sp], #16
|
||||
|
||||
st2 {v0.d, v1.d}[0], [sp], #16
|
||||
st2 {v0.2d, v1.2d}, [sp], #32
|
||||
|
||||
st2 {v0.s, v1.s}[0], [sp], x0
|
||||
st2 {v0.2s, v1.2s}, [sp], x0
|
||||
|
||||
st2 {v0.d, v1.d}[0], [sp], x0
|
||||
st2 {v0.2d, v1.2d}, [sp], x0
|
||||
|
||||
# ALL: Iterations: 100
|
||||
# ALL-NEXT: Instructions: 1200
|
||||
|
||||
# M3-NEXT: Total Cycles: 8703
|
||||
# M3-NEXT: Total uOps: 5400
|
||||
|
||||
# M4-NEXT: Total Cycles: 2403
|
||||
# M4-NEXT: Total uOps: 2300
|
||||
|
||||
# M5-NEXT: Total Cycles: 2403
|
||||
# M5-NEXT: Total uOps: 2000
|
||||
|
||||
# ALL: Dispatch Width: 6
|
||||
|
||||
# M3-NEXT: uOps Per Cycle: 0.62
|
||||
# M3-NEXT: IPC: 0.14
|
||||
# M3-NEXT: Block RThroughput: 40.5
|
||||
|
||||
# M4-NEXT: uOps Per Cycle: 0.96
|
||||
# M4-NEXT: IPC: 0.50
|
||||
# M4-NEXT: Block RThroughput: 7.5
|
||||
|
||||
# M5-NEXT: uOps Per Cycle: 0.83
|
||||
# M5-NEXT: IPC: 0.50
|
||||
# M5-NEXT: Block RThroughput: 7.5
|
||||
|
||||
# ALL: Instruction Info:
|
||||
# ALL-NEXT: [1]: #uOps
|
||||
# ALL-NEXT: [2]: Latency
|
||||
# ALL-NEXT: [3]: RThroughput
|
||||
# ALL-NEXT: [4]: MayLoad
|
||||
# ALL-NEXT: [5]: MayStore
|
||||
# ALL-NEXT: [6]: HasSideEffects (U)
|
||||
|
||||
# ALL: [1] [2] [3] [4] [5] [6] Instructions:
|
||||
|
||||
# M3-NEXT: 4 7 3.00 * st2 { v0.s, v1.s }[0], [sp]
|
||||
# M3-NEXT: 4 7 3.00 * st2 { v0.2s, v1.2s }, [sp]
|
||||
# M3-NEXT: 4 7 3.00 * st2 { v0.d, v1.d }[0], [sp]
|
||||
# M3-NEXT: 6 8 4.50 * st2 { v0.2d, v1.2d }, [sp]
|
||||
# M3-NEXT: 4 7 3.00 * st2 { v0.s, v1.s }[0], [sp], #8
|
||||
# M3-NEXT: 4 7 3.00 * st2 { v0.2s, v1.2s }, [sp], #16
|
||||
# M3-NEXT: 4 7 3.00 * st2 { v0.d, v1.d }[0], [sp], #16
|
||||
# M3-NEXT: 6 8 4.50 * st2 { v0.2d, v1.2d }, [sp], #32
|
||||
# M3-NEXT: 4 7 3.00 * st2 { v0.s, v1.s }[0], [sp], x0
|
||||
# M3-NEXT: 4 7 3.00 * st2 { v0.2s, v1.2s }, [sp], x0
|
||||
# M3-NEXT: 4 7 3.00 * st2 { v0.d, v1.d }[0], [sp], x0
|
||||
# M3-NEXT: 6 8 4.50 * st2 { v0.2d, v1.2d }, [sp], x0
|
||||
|
||||
# M4-NEXT: 1 2 0.50 * st2 { v0.s, v1.s }[0], [sp]
|
||||
# M4-NEXT: 1 2 0.50 * st2 { v0.2s, v1.2s }, [sp]
|
||||
# M4-NEXT: 1 2 0.50 * st2 { v0.d, v1.d }[0], [sp]
|
||||
# M4-NEXT: 2 2 1.00 * st2 { v0.2d, v1.2d }, [sp]
|
||||
# M4-NEXT: 2 2 0.50 * st2 { v0.s, v1.s }[0], [sp], #8
|
||||
# M4-NEXT: 2 2 0.50 * st2 { v0.2s, v1.2s }, [sp], #16
|
||||
# M4-NEXT: 2 2 0.50 * st2 { v0.d, v1.d }[0], [sp], #16
|
||||
# M4-NEXT: 3 2 1.00 * st2 { v0.2d, v1.2d }, [sp], #32
|
||||
# M4-NEXT: 2 2 0.50 * st2 { v0.s, v1.s }[0], [sp], x0
|
||||
# M4-NEXT: 2 2 0.50 * st2 { v0.2s, v1.2s }, [sp], x0
|
||||
# M4-NEXT: 2 2 0.50 * st2 { v0.d, v1.d }[0], [sp], x0
|
||||
# M4-NEXT: 3 2 1.00 * st2 { v0.2d, v1.2d }, [sp], x0
|
||||
|
||||
# M5-NEXT: 1 2 0.50 * st2 { v0.s, v1.s }[0], [sp]
|
||||
# M5-NEXT: 1 2 0.50 * st2 { v0.2s, v1.2s }, [sp]
|
||||
# M5-NEXT: 1 2 0.50 * st2 { v0.d, v1.d }[0], [sp]
|
||||
# M5-NEXT: 1 2 1.00 * st2 { v0.2d, v1.2d }, [sp]
|
||||
# M5-NEXT: 2 2 0.50 * st2 { v0.s, v1.s }[0], [sp], #8
|
||||
# M5-NEXT: 2 2 0.50 * st2 { v0.2s, v1.2s }, [sp], #16
|
||||
# M5-NEXT: 2 2 0.50 * st2 { v0.d, v1.d }[0], [sp], #16
|
||||
# M5-NEXT: 2 2 1.00 * st2 { v0.2d, v1.2d }, [sp], #32
|
||||
# M5-NEXT: 2 2 0.50 * st2 { v0.s, v1.s }[0], [sp], x0
|
||||
# M5-NEXT: 2 2 0.50 * st2 { v0.2s, v1.2s }, [sp], x0
|
||||
# M5-NEXT: 2 2 0.50 * st2 { v0.d, v1.d }[0], [sp], x0
|
||||
# M5-NEXT: 2 2 1.00 * st2 { v0.2d, v1.2d }, [sp], x0
|
|
@ -0,0 +1,97 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
|
||||
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3
|
||||
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4
|
||||
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5
|
||||
|
||||
st3 {v0.s, v1.s, v2.s}[0], [sp]
|
||||
st3 {v0.2s, v1.2s, v2.2s}, [sp]
|
||||
|
||||
st3 {v0.d, v1.d, v2.d}[0], [sp]
|
||||
st3 {v0.2d, v1.2d, v2.2d}, [sp]
|
||||
|
||||
st3 {v0.s, v1.s, v2.s}[0], [sp], #12
|
||||
st3 {v0.2s, v1.2s, v2.2s}, [sp], #24
|
||||
|
||||
st3 {v0.d, v1.d, v2.d}[0], [sp], #24
|
||||
st3 {v0.2d, v1.2d, v2.2d}, [sp], #48
|
||||
|
||||
st3 {v0.s, v1.s, v2.s}[0], [sp], x0
|
||||
st3 {v0.2s, v1.2s, v2.2s}, [sp], x0
|
||||
|
||||
st3 {v0.d, v1.d, v2.d}[0], [sp], x0
|
||||
st3 {v0.2d, v1.2d, v2.2d}, [sp], x0
|
||||
|
||||
# ALL: Iterations: 100
|
||||
# ALL-NEXT: Instructions: 1200
|
||||
|
||||
# M3-NEXT: Total Cycles: 18003
|
||||
# M3-NEXT: Total uOps: 8400
|
||||
|
||||
# M4-NEXT: Total Cycles: 3903
|
||||
# M4-NEXT: Total uOps: 5000
|
||||
|
||||
# M5-NEXT: Total Cycles: 3603
|
||||
# M5-NEXT: Total uOps: 4400
|
||||
|
||||
# ALL: Dispatch Width: 6
|
||||
|
||||
# M3-NEXT: uOps Per Cycle: 0.47
|
||||
# M3-NEXT: IPC: 0.07
|
||||
# M3-NEXT: Block RThroughput: 72.0
|
||||
|
||||
# M4-NEXT: uOps Per Cycle: 1.28
|
||||
# M4-NEXT: IPC: 0.31
|
||||
# M4-NEXT: Block RThroughput: 21.0
|
||||
|
||||
# M5-NEXT: uOps Per Cycle: 1.22
|
||||
# M5-NEXT: IPC: 0.33
|
||||
# M5-NEXT: Block RThroughput: 10.5
|
||||
|
||||
# ALL: Instruction Info:
|
||||
# ALL-NEXT: [1]: #uOps
|
||||
# ALL-NEXT: [2]: Latency
|
||||
# ALL-NEXT: [3]: RThroughput
|
||||
# ALL-NEXT: [4]: MayLoad
|
||||
# ALL-NEXT: [5]: MayStore
|
||||
# ALL-NEXT: [6]: HasSideEffects (U)
|
||||
|
||||
# ALL: [1] [2] [3] [4] [5] [6] Instructions:
|
||||
|
||||
# M3-NEXT: 5 14 4.50 * st3 { v0.s, v1.s, v2.s }[0], [sp]
|
||||
# M3-NEXT: 7 15 6.00 * st3 { v0.2s, v1.2s, v2.2s }, [sp]
|
||||
# M3-NEXT: 7 15 6.00 * st3 { v0.d, v1.d, v2.d }[0], [sp]
|
||||
# M3-NEXT: 9 16 7.50 * st3 { v0.2d, v1.2d, v2.2d }, [sp]
|
||||
# M3-NEXT: 5 14 4.50 * st3 { v0.s, v1.s, v2.s }[0], [sp], #12
|
||||
# M3-NEXT: 7 15 6.00 * st3 { v0.2s, v1.2s, v2.2s }, [sp], #24
|
||||
# M3-NEXT: 7 15 6.00 * st3 { v0.d, v1.d, v2.d }[0], [sp], #24
|
||||
# M3-NEXT: 9 16 7.50 * st3 { v0.2d, v1.2d, v2.2d }, [sp], #48
|
||||
# M3-NEXT: 5 14 4.50 * st3 { v0.s, v1.s, v2.s }[0], [sp], x0
|
||||
# M3-NEXT: 7 15 6.00 * st3 { v0.2s, v1.2s, v2.2s }, [sp], x0
|
||||
# M3-NEXT: 7 15 6.00 * st3 { v0.d, v1.d, v2.d }[0], [sp], x0
|
||||
# M3-NEXT: 9 16 7.50 * st3 { v0.2d, v1.2d, v2.2d }, [sp], x0
|
||||
|
||||
# M4-NEXT: 2 2 1.00 * st3 { v0.s, v1.s, v2.s }[0], [sp]
|
||||
# M4-NEXT: 4 4 2.00 * st3 { v0.2s, v1.2s, v2.2s }, [sp]
|
||||
# M4-NEXT: 2 2 1.00 * st3 { v0.d, v1.d, v2.d }[0], [sp]
|
||||
# M4-NEXT: 6 5 3.00 * st3 { v0.2d, v1.2d, v2.2d }, [sp]
|
||||
# M4-NEXT: 3 2 1.00 * st3 { v0.s, v1.s, v2.s }[0], [sp], #12
|
||||
# M4-NEXT: 5 4 2.00 * st3 { v0.2s, v1.2s, v2.2s }, [sp], #24
|
||||
# M4-NEXT: 3 2 1.00 * st3 { v0.d, v1.d, v2.d }[0], [sp], #24
|
||||
# M4-NEXT: 7 5 3.00 * st3 { v0.2d, v1.2d, v2.2d }, [sp], #48
|
||||
# M4-NEXT: 3 2 1.00 * st3 { v0.s, v1.s, v2.s }[0], [sp], x0
|
||||
# M4-NEXT: 5 4 2.00 * st3 { v0.2s, v1.2s, v2.2s }, [sp], x0
|
||||
# M4-NEXT: 3 2 1.00 * st3 { v0.d, v1.d, v2.d }[0], [sp], x0
|
||||
# M4-NEXT: 7 5 3.00 * st3 { v0.2d, v1.2d, v2.2d }, [sp], x0
|
||||
|
||||
# M5-NEXT: 2 2 1.00 * st3 { v0.s, v1.s, v2.s }[0], [sp]
|
||||
# M5-NEXT: 3 4 1.00 * st3 { v0.2s, v1.2s, v2.2s }, [sp]
|
||||
# M5-NEXT: 2 2 1.00 * st3 { v0.d, v1.d, v2.d }[0], [sp]
|
||||
# M5-NEXT: 5 4 1.50 * st3 { v0.2d, v1.2d, v2.2d }, [sp]
|
||||
# M5-NEXT: 3 2 1.00 * st3 { v0.s, v1.s, v2.s }[0], [sp], #12
|
||||
# M5-NEXT: 4 4 1.00 * st3 { v0.2s, v1.2s, v2.2s }, [sp], #24
|
||||
# M5-NEXT: 3 2 1.00 * st3 { v0.d, v1.d, v2.d }[0], [sp], #24
|
||||
# M5-NEXT: 6 4 1.50 * st3 { v0.2d, v1.2d, v2.2d }, [sp], #48
|
||||
# M5-NEXT: 3 2 1.00 * st3 { v0.s, v1.s, v2.s }[0], [sp], x0
|
||||
# M5-NEXT: 4 4 1.00 * st3 { v0.2s, v1.2s, v2.2s }, [sp], x0
|
||||
# M5-NEXT: 3 2 1.00 * st3 { v0.d, v1.d, v2.d }[0], [sp], x0
|
||||
# M5-NEXT: 6 4 1.50 * st3 { v0.2d, v1.2d, v2.2d }, [sp], x0
|
|
@ -0,0 +1,97 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
|
||||
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3
|
||||
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4
|
||||
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5
|
||||
|
||||
st4 {v0.s, v1.s, v2.s, v3.s}[0], [sp]
|
||||
st4 {v0.2s, v1.2s, v2.2s, v3.2s}, [sp]
|
||||
|
||||
st4 {v0.d, v1.d, v2.d, v3.d}[0], [sp]
|
||||
st4 {v0.2d, v1.2d, v2.2d, v3.2d}, [sp]
|
||||
|
||||
st4 {v0.s, v1.s, v2.s, v3.s}[0], [sp], #16
|
||||
st4 {v0.2s, v1.2s, v2.2s, v3.2s}, [sp], #32
|
||||
|
||||
st4 {v0.d, v1.d, v2.d, v3.d}[0], [sp], #32
|
||||
st4 {v0.2d, v1.2d, v2.2d, v3.2d}, [sp], #64
|
||||
|
||||
st4 {v0.s, v1.s, v2.s, v3.s}[0], [sp], x0
|
||||
st4 {v0.2s, v1.2s, v2.2s, v3.2s}, [sp], x0
|
||||
|
||||
st4 {v0.d, v1.d, v2.d, v3.d}[0], [sp], x0
|
||||
st4 {v0.2d, v1.2d, v2.2d, v3.2d}, [sp], x0
|
||||
|
||||
# ALL: Iterations: 100
|
||||
# ALL-NEXT: Instructions: 1200
|
||||
|
||||
# M3-NEXT: Total Cycles: 18603
|
||||
# M3-NEXT: Total uOps: 9000
|
||||
|
||||
# M4-NEXT: Total Cycles: 4803
|
||||
# M4-NEXT: Total uOps: 4700
|
||||
|
||||
# M5-NEXT: Total Cycles: 4803
|
||||
# M5-NEXT: Total uOps: 4700
|
||||
|
||||
# ALL: Dispatch Width: 6
|
||||
|
||||
# M3-NEXT: uOps Per Cycle: 0.48
|
||||
# M3-NEXT: IPC: 0.06
|
||||
# M3-NEXT: Block RThroughput: 76.5
|
||||
|
||||
# M4-NEXT: uOps Per Cycle: 0.98
|
||||
# M4-NEXT: IPC: 0.25
|
||||
# M4-NEXT: Block RThroughput: 24.0
|
||||
|
||||
# M5-NEXT: uOps Per Cycle: 0.98
|
||||
# M5-NEXT: IPC: 0.25
|
||||
# M5-NEXT: Block RThroughput: 24.0
|
||||
|
||||
# ALL: Instruction Info:
|
||||
# ALL-NEXT: [1]: #uOps
|
||||
# ALL-NEXT: [2]: Latency
|
||||
# ALL-NEXT: [3]: RThroughput
|
||||
# ALL-NEXT: [4]: MayLoad
|
||||
# ALL-NEXT: [5]: MayStore
|
||||
# ALL-NEXT: [6]: HasSideEffects (U)
|
||||
|
||||
# ALL: [1] [2] [3] [4] [5] [6] Instructions:
|
||||
|
||||
# M3-NEXT: 7 15 6.00 * st4 { v0.s, v1.s, v2.s, v3.s }[0], [sp]
|
||||
# M3-NEXT: 7 15 6.00 * st4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp]
|
||||
# M3-NEXT: 7 15 6.00 * st4 { v0.d, v1.d, v2.d, v3.d }[0], [sp]
|
||||
# M3-NEXT: 9 17 7.50 * st4 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp]
|
||||
# M3-NEXT: 7 15 6.00 * st4 { v0.s, v1.s, v2.s, v3.s }[0], [sp], #16
|
||||
# M3-NEXT: 7 15 6.00 * st4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], #32
|
||||
# M3-NEXT: 7 15 6.00 * st4 { v0.d, v1.d, v2.d, v3.d }[0], [sp], #32
|
||||
# M3-NEXT: 9 17 7.50 * st4 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], #64
|
||||
# M3-NEXT: 7 15 6.00 * st4 { v0.s, v1.s, v2.s, v3.s }[0], [sp], x0
|
||||
# M3-NEXT: 7 15 6.00 * st4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], x0
|
||||
# M3-NEXT: 7 15 6.00 * st4 { v0.d, v1.d, v2.d, v3.d }[0], [sp], x0
|
||||
# M3-NEXT: 9 17 7.50 * st4 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], x0
|
||||
|
||||
# M4-NEXT: 2 2 1.00 * st4 { v0.s, v1.s, v2.s, v3.s }[0], [sp]
|
||||
# M4-NEXT: 4 4 2.00 * st4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp]
|
||||
# M4-NEXT: 2 2 1.00 * st4 { v0.d, v1.d, v2.d, v3.d }[0], [sp]
|
||||
# M4-NEXT: 5 8 4.00 * st4 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp]
|
||||
# M4-NEXT: 3 2 1.00 * st4 { v0.s, v1.s, v2.s, v3.s }[0], [sp], #16
|
||||
# M4-NEXT: 5 4 2.00 * st4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], #32
|
||||
# M4-NEXT: 3 2 1.00 * st4 { v0.d, v1.d, v2.d, v3.d }[0], [sp], #32
|
||||
# M4-NEXT: 6 8 4.00 * st4 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], #64
|
||||
# M4-NEXT: 3 2 1.00 * st4 { v0.s, v1.s, v2.s, v3.s }[0], [sp], x0
|
||||
# M4-NEXT: 5 4 2.00 * st4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], x0
|
||||
# M4-NEXT: 3 2 1.00 * st4 { v0.d, v1.d, v2.d, v3.d }[0], [sp], x0
|
||||
# M4-NEXT: 6 8 4.00 * st4 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], x0
|
||||
|
||||
# M5-NEXT: 2 2 1.00 * st4 { v0.s, v1.s, v2.s, v3.s }[0], [sp]
|
||||
# M5-NEXT: 4 4 2.00 * st4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp]
|
||||
# M5-NEXT: 2 2 1.00 * st4 { v0.d, v1.d, v2.d, v3.d }[0], [sp]
|
||||
# M5-NEXT: 5 8 4.00 * st4 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp]
|
||||
# M5-NEXT: 3 2 1.00 * st4 { v0.s, v1.s, v2.s, v3.s }[0], [sp], #16
|
||||
# M5-NEXT: 5 4 2.00 * st4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], #32
|
||||
# M5-NEXT: 3 2 1.00 * st4 { v0.d, v1.d, v2.d, v3.d }[0], [sp], #32
|
||||
# M5-NEXT: 6 8 4.00 * st4 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], #64
|
||||
# M5-NEXT: 3 2 1.00 * st4 { v0.s, v1.s, v2.s, v3.s }[0], [sp], x0
|
||||
# M5-NEXT: 5 4 2.00 * st4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], x0
|
||||
# M5-NEXT: 3 2 1.00 * st4 { v0.d, v1.d, v2.d, v3.d }[0], [sp], x0
|
||||
# M5-NEXT: 6 8 4.00 * st4 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], x0
|
|
@ -0,0 +1,58 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
|
||||
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3
|
||||
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4
|
||||
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5
|
||||
|
||||
crc32w w0, w1, w2
|
||||
crc32w w0, w0, w3
|
||||
|
||||
crc32cx w0, w1, x2
|
||||
crc32cx w0, w0, x3
|
||||
|
||||
# ALL: Iterations: 100
|
||||
# ALL-NEXT: Instructions: 400
|
||||
|
||||
# M3-NEXT: Total Cycles: 204
|
||||
# M4-NEXT: Total Cycles: 404
|
||||
# M5-NEXT: Total Cycles: 204
|
||||
|
||||
# ALL-NEXT: Total uOps: 400
|
||||
|
||||
# ALL: Dispatch Width: 6
|
||||
|
||||
# M3-NEXT: uOps Per Cycle: 1.96
|
||||
# M3-NEXT: IPC: 1.96
|
||||
# M3-NEXT: Block RThroughput: 2.0
|
||||
|
||||
# M4-NEXT: uOps Per Cycle: 0.99
|
||||
# M4-NEXT: IPC: 0.99
|
||||
# M4-NEXT: Block RThroughput: 4.0
|
||||
|
||||
# M5-NEXT: uOps Per Cycle: 1.96
|
||||
# M5-NEXT: IPC: 1.96
|
||||
# M5-NEXT: Block RThroughput: 2.0
|
||||
|
||||
# ALL: Instruction Info:
|
||||
# ALL-NEXT: [1]: #uOps
|
||||
# ALL-NEXT: [2]: Latency
|
||||
# ALL-NEXT: [3]: RThroughput
|
||||
# ALL-NEXT: [4]: MayLoad
|
||||
# ALL-NEXT: [5]: MayStore
|
||||
# ALL-NEXT: [6]: HasSideEffects (U)
|
||||
|
||||
# ALL: [1] [2] [3] [4] [5] [6] Instructions:
|
||||
|
||||
# M3-NEXT: 1 2 0.50 crc32w w0, w1, w2
|
||||
# M3-NEXT: 1 2 0.50 crc32w w0, w0, w3
|
||||
# M3-NEXT: 1 2 0.50 crc32cx w0, w1, x2
|
||||
# M3-NEXT: 1 2 0.50 crc32cx w0, w0, x3
|
||||
|
||||
# M4-NEXT: 1 2 1.00 crc32w w0, w1, w2
|
||||
# M4-NEXT: 1 2 1.00 crc32w w0, w0, w3
|
||||
# M4-NEXT: 1 2 1.00 crc32cx w0, w1, x2
|
||||
# M4-NEXT: 1 2 1.00 crc32cx w0, w0, x3
|
||||
|
||||
# M5-NEXT: 1 2 0.50 crc32w w0, w1, w2
|
||||
# M5-NEXT: 1 2 0.50 crc32w w0, w0, w3
|
||||
# M5-NEXT: 1 2 0.50 crc32cx w0, w1, x2
|
||||
# M5-NEXT: 1 2 0.50 crc32cx w0, w0, x3
|
|
@ -1,6 +1,7 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
|
||||
# RUN: llvm-mca -mtriple=aarch64-linux-gnu -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3
|
||||
# RUN: llvm-mca -mtriple=aarch64-linux-gnu -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4
|
||||
# RUN: llvm-mca -mtriple=aarch64-linux-gnu -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5
|
||||
|
||||
b main
|
||||
|
||||
|
@ -9,6 +10,7 @@
|
|||
|
||||
# M3-NEXT: Total Cycles: 18
|
||||
# M4-NEXT: Total Cycles: 18
|
||||
# M5-NEXT: Total Cycles: 18
|
||||
|
||||
# ALL-NEXT: Total uOps: 100
|
||||
|
||||
|
@ -22,6 +24,11 @@
|
|||
# M4-NEXT: IPC: 5.56
|
||||
# M4-NEXT: Block RThroughput: 0.2
|
||||
|
||||
# M5: Dispatch Width: 6
|
||||
# M5-NEXT: uOps Per Cycle: 5.56
|
||||
# M5-NEXT: IPC: 5.56
|
||||
# M5-NEXT: Block RThroughput: 0.2
|
||||
|
||||
# ALL: Instruction Info:
|
||||
# ALL-NEXT: [1]: #uOps
|
||||
# ALL-NEXT: [2]: Latency
|
||||
|
@ -34,3 +41,4 @@
|
|||
|
||||
# M3-NEXT: 1 0 0.17 b main
|
||||
# M4-NEXT: 1 0 0.17 b main
|
||||
# M5-NEXT: 1 0 0.17 b main
|
||||
|
|
|
@ -0,0 +1,67 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
|
||||
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,EM3
|
||||
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,EM4
|
||||
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,EM5
|
||||
|
||||
sdiv w0, w1, w2
|
||||
udiv x1, x2, x3
|
||||
|
||||
mul w2, w3, w4
|
||||
msub x3, x4, x5, x6
|
||||
|
||||
smull x4, w5, w6
|
||||
umulh x5, x6, x7
|
||||
|
||||
# ALL: Iterations: 100
|
||||
# ALL-NEXT: Instructions: 600
|
||||
|
||||
# EM3-NEXT: Total Cycles: 3305
|
||||
# EM4-NEXT: Total Cycles: 3303
|
||||
# EM5-NEXT: Total Cycles: 2603
|
||||
|
||||
# ALL-NEXT: Total uOps: 600
|
||||
|
||||
# ALL: Dispatch Width: 6
|
||||
|
||||
# EM3-NEXT: uOps Per Cycle: 0.18
|
||||
# EM3-NEXT: IPC: 0.18
|
||||
# EM3-NEXT: Block RThroughput: 33.0
|
||||
|
||||
# EM4-NEXT: uOps Per Cycle: 0.18
|
||||
# EM4-NEXT: IPC: 0.18
|
||||
# EM4-NEXT: Block RThroughput: 33.0
|
||||
|
||||
# EM5-NEXT: uOps Per Cycle: 0.23
|
||||
# EM5-NEXT: IPC: 0.23
|
||||
# EM5-NEXT: Block RThroughput: 26.0
|
||||
|
||||
# ALL: Instruction Info:
|
||||
# ALL-NEXT: [1]: #uOps
|
||||
# ALL-NEXT: [2]: Latency
|
||||
# ALL-NEXT: [3]: RThroughput
|
||||
# ALL-NEXT: [4]: MayLoad
|
||||
# ALL-NEXT: [5]: MayStore
|
||||
# ALL-NEXT: [6]: HasSideEffects (U)
|
||||
|
||||
# ALL: [1] [2] [3] [4] [5] [6] Instructions:
|
||||
|
||||
# EM3-NEXT: 1 12 12.00 sdiv w0, w1, w2
|
||||
# EM3-NEXT: 1 21 21.00 udiv x1, x2, x3
|
||||
# EM3-NEXT: 1 3 0.50 mul w2, w3, w4
|
||||
# EM3-NEXT: 1 4 1.00 msub x3, x4, x5, x6
|
||||
# EM3-NEXT: 1 3 0.50 smull x4, w5, w6
|
||||
# EM3-NEXT: 1 4 1.00 umulh x5, x6, x7
|
||||
|
||||
# EM4-NEXT: 1 12 12.00 sdiv w0, w1, w2
|
||||
# EM4-NEXT: 1 21 21.00 udiv x1, x2, x3
|
||||
# EM4-NEXT: 1 3 0.50 mul w2, w3, w4
|
||||
# EM4-NEXT: 1 4 1.00 msub x3, x4, x5, x6
|
||||
# EM4-NEXT: 1 3 0.50 smull x4, w5, w6
|
||||
# EM4-NEXT: 1 4 1.00 umulh x5, x6, x7
|
||||
|
||||
# EM5-NEXT: 1 10 10.00 sdiv w0, w1, w2
|
||||
# EM5-NEXT: 1 16 16.00 udiv x1, x2, x3
|
||||
# EM5-NEXT: 1 2 0.50 mul w2, w3, w4
|
||||
# EM5-NEXT: 1 3 1.00 msub x3, x4, x5, x6
|
||||
# EM5-NEXT: 1 2 0.50 smull x4, w5, w6
|
||||
# EM5-NEXT: 1 3 1.00 umulh x5, x6, x7
|
|
@ -0,0 +1,66 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
|
||||
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3
|
||||
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4
|
||||
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5
|
||||
|
||||
fmov d31, #1.00000000
|
||||
fdiv d30, d31, d30
|
||||
|
||||
# Newton series for 1 / x.
|
||||
frecpe d1, d0
|
||||
frecps d2, d0, d1
|
||||
fmul d1, d1, d2
|
||||
frecps d2, d0, d1
|
||||
fmul d1, d1, d2
|
||||
frecps d0, d0, d1
|
||||
fmul d0, d1, d0
|
||||
|
||||
# ALL: Iterations: 100
|
||||
# ALL-NEXT: Instructions: 900
|
||||
|
||||
# M3-NEXT: Total Cycles: 2503
|
||||
# M4-NEXT: Total Cycles: 2403
|
||||
# M5-NEXT: Total Cycles: 2403
|
||||
|
||||
# ALL-NEXT: Total uOps: 900
|
||||
|
||||
# ALL: Dispatch Width: 6
|
||||
|
||||
# M3-NEXT: uOps Per Cycle: 0.36
|
||||
# M3-NEXT: IPC: 0.36
|
||||
# M3-NEXT: Block RThroughput: 3.3
|
||||
|
||||
# M4-NEXT: uOps Per Cycle: 0.37
|
||||
# M4-NEXT: IPC: 0.37
|
||||
# M4-NEXT: Block RThroughput: 2.3
|
||||
|
||||
# M5-NEXT: uOps Per Cycle: 0.37
|
||||
# M5-NEXT: IPC: 0.37
|
||||
# M5-NEXT: Block RThroughput: 2.3
|
||||
|
||||
# ALL: Instruction Info:
|
||||
# ALL-NEXT: [1]: #uOps
|
||||
# ALL-NEXT: [2]: Latency
|
||||
# ALL-NEXT: [3]: RThroughput
|
||||
# ALL-NEXT: [4]: MayLoad
|
||||
# ALL-NEXT: [5]: MayStore
|
||||
# ALL-NEXT: [6]: HasSideEffects (U)
|
||||
|
||||
# ALL: [1] [2] [3] [4] [5] [6] Instructions:
|
||||
# ALL-NEXT: 1 1 0.33 fmov d31, #1.00000000
|
||||
|
||||
# M3-NEXT: 1 12 3.25 fdiv d30, d31, d30
|
||||
# M3-NEXT: 1 4 0.50 frecpe d1, d0
|
||||
|
||||
# M4-NEXT: 1 12 2.25 fdiv d30, d31, d30
|
||||
# M4-NEXT: 1 3 0.50 frecpe d1, d0
|
||||
|
||||
# M5-NEXT: 1 12 2.25 fdiv d30, d31, d30
|
||||
# M5-NEXT: 1 3 0.50 frecpe d1, d0
|
||||
|
||||
# ALL-NEXT: 1 4 0.33 frecps d2, d0, d1
|
||||
# ALL-NEXT: 1 3 0.33 fmul d1, d1, d2
|
||||
# ALL-NEXT: 1 4 0.33 frecps d2, d0, d1
|
||||
# ALL-NEXT: 1 3 0.33 fmul d1, d1, d2
|
||||
# ALL-NEXT: 1 4 0.33 frecps d0, d0, d1
|
||||
# ALL-NEXT: 1 3 0.33 fmul d0, d1, d0
|
|
@ -0,0 +1,78 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
|
||||
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3
|
||||
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4
|
||||
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5
|
||||
|
||||
fsqrt d30, d30
|
||||
fmov d31, #1.00000000
|
||||
fdiv d30, d31, d30
|
||||
|
||||
# Newton series for 1 / sqrt().
|
||||
frsqrte d1, d0
|
||||
fmul d2, d1, d1
|
||||
frsqrts d2, d0, d2
|
||||
fmul d1, d1, d2
|
||||
fmul d2, d1, d1
|
||||
frsqrts d2, d0, d2
|
||||
fmul d1, d1, d2
|
||||
fmul d2, d1, d1
|
||||
frsqrts d0, d0, d2
|
||||
fmul d0, d1, d0
|
||||
|
||||
# ALL: Iterations: 100
|
||||
# ALL-NEXT: Instructions: 1300
|
||||
|
||||
# M3-NEXT: Total Cycles: 3703
|
||||
# M4-NEXT: Total Cycles: 3303
|
||||
# M5-NEXT: Total Cycles: 3303
|
||||
|
||||
# ALL-NEXT: Total uOps: 1300
|
||||
|
||||
# ALL: Dispatch Width: 6
|
||||
|
||||
# M3-NEXT: uOps Per Cycle: 0.35
|
||||
# M3-NEXT: IPC: 0.35
|
||||
# M3-NEXT: Block RThroughput: 26.0
|
||||
|
||||
# M4-NEXT: uOps Per Cycle: 0.39
|
||||
# M4-NEXT: IPC: 0.39
|
||||
# M4-NEXT: Block RThroughput: 3.0
|
||||
|
||||
# M5-NEXT: uOps Per Cycle: 0.39
|
||||
# M5-NEXT: IPC: 0.39
|
||||
# M5-NEXT: Block RThroughput: 3.0
|
||||
|
||||
# ALL: Instruction Info:
|
||||
# ALL-NEXT: [1]: #uOps
|
||||
# ALL-NEXT: [2]: Latency
|
||||
# ALL-NEXT: [3]: RThroughput
|
||||
# ALL-NEXT: [4]: MayLoad
|
||||
# ALL-NEXT: [5]: MayStore
|
||||
# ALL-NEXT: [6]: HasSideEffects (U)
|
||||
|
||||
# ALL: [1] [2] [3] [4] [5] [6] Instructions:
|
||||
|
||||
# M3-NEXT: 1 25 26.00 fsqrt d30, d30
|
||||
# M4-NEXT: 1 12 2.25 fsqrt d30, d30
|
||||
# M5-NEXT: 1 12 2.25 fsqrt d30, d30
|
||||
|
||||
# ALL-NEXT: 1 1 0.33 fmov d31, #1.00000000
|
||||
|
||||
# M3-NEXT: 1 12 3.25 fdiv d30, d31, d30
|
||||
# M3-NEXT: 1 4 0.50 frsqrte d1, d0
|
||||
|
||||
# M4-NEXT: 1 12 2.25 fdiv d30, d31, d30
|
||||
# M4-NEXT: 1 3 0.50 frsqrte d1, d0
|
||||
|
||||
# M5-NEXT: 1 12 2.25 fdiv d30, d31, d30
|
||||
# M5-NEXT: 1 3 0.50 frsqrte d1, d0
|
||||
|
||||
# ALL-NEXT: 1 3 0.33 fmul d2, d1, d1
|
||||
# ALL-NEXT: 1 4 0.33 frsqrts d2, d0, d2
|
||||
# ALL-NEXT: 1 3 0.33 fmul d1, d1, d2
|
||||
# ALL-NEXT: 1 3 0.33 fmul d2, d1, d1
|
||||
# ALL-NEXT: 1 4 0.33 frsqrts d2, d0, d2
|
||||
# ALL-NEXT: 1 3 0.33 fmul d1, d1, d2
|
||||
# ALL-NEXT: 1 3 0.33 fmul d2, d1, d1
|
||||
# ALL-NEXT: 1 4 0.33 frsqrts d0, d0, d2
|
||||
# ALL-NEXT: 1 3 0.33 fmul d0, d1, d0
|
|
@ -0,0 +1,79 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
|
||||
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3
|
||||
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4
|
||||
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5
|
||||
|
||||
fsqrt d31, d31
|
||||
|
||||
# Newton series for sqrt().
|
||||
frsqrte d1, d0
|
||||
fmul d2, d1, d1
|
||||
frsqrts d2, d0, d2
|
||||
fmul d1, d1, d2
|
||||
fmul d2, d1, d1
|
||||
frsqrts d2, d0, d2
|
||||
fmul d1, d1, d2
|
||||
fmul d2, d1, d1
|
||||
frsqrts d2, d0, d2
|
||||
fmul d2, d2, d0
|
||||
fmul d1, d1, d2
|
||||
fcmp d0, #0.0
|
||||
fcsel d0, d0, d1, eq
|
||||
|
||||
# ALL: Iterations: 100
|
||||
# ALL-NEXT: Instructions: 1400
|
||||
|
||||
# M3-NEXT: Total Cycles: 4203
|
||||
# M4-NEXT: Total Cycles: 4103
|
||||
# M5-NEXT: Total Cycles: 3803
|
||||
|
||||
# ALL-NEXT: Total uOps: 1500
|
||||
|
||||
# ALL: Dispatch Width: 6
|
||||
|
||||
# M3-NEXT: uOps Per Cycle: 0.36
|
||||
# M3-NEXT: IPC: 0.33
|
||||
# M3-NEXT: Block RThroughput: 27.0
|
||||
|
||||
# M4-NEXT: uOps Per Cycle: 0.37
|
||||
# M4-NEXT: IPC: 0.34
|
||||
# M4-NEXT: Block RThroughput: 3.3
|
||||
|
||||
# M5-NEXT: uOps Per Cycle: 0.39
|
||||
# M5-NEXT: IPC: 0.37
|
||||
# M5-NEXT: Block RThroughput: 3.3
|
||||
|
||||
# ALL: Instruction Info:
|
||||
# ALL-NEXT: [1]: #uOps
|
||||
# ALL-NEXT: [2]: Latency
|
||||
# ALL-NEXT: [3]: RThroughput
|
||||
# ALL-NEXT: [4]: MayLoad
|
||||
# ALL-NEXT: [5]: MayStore
|
||||
# ALL-NEXT: [6]: HasSideEffects (U)
|
||||
|
||||
# ALL: [1] [2] [3] [4] [5] [6] Instructions:
|
||||
|
||||
# M3-NEXT: 1 25 26.00 fsqrt d31, d31
|
||||
# M3-NEXT: 1 4 0.50 frsqrte d1, d0
|
||||
|
||||
# M4-NEXT: 1 12 2.25 fsqrt d31, d31
|
||||
# M4-NEXT: 1 3 0.50 frsqrte d1, d0
|
||||
|
||||
# M5-NEXT: 1 12 2.25 fsqrt d31, d31
|
||||
# M5-NEXT: 1 3 0.50 frsqrte d1, d0
|
||||
|
||||
# ALL-NEXT: 1 3 0.33 fmul d2, d1, d1
|
||||
# ALL-NEXT: 1 4 0.33 frsqrts d2, d0, d2
|
||||
# ALL-NEXT: 1 3 0.33 fmul d1, d1, d2
|
||||
# ALL-NEXT: 1 3 0.33 fmul d2, d1, d1
|
||||
# ALL-NEXT: 1 4 0.33 frsqrts d2, d0, d2
|
||||
# ALL-NEXT: 1 3 0.33 fmul d1, d1, d2
|
||||
# ALL-NEXT: 1 3 0.33 fmul d2, d1, d1
|
||||
# ALL-NEXT: 1 4 0.33 frsqrts d2, d0, d2
|
||||
# ALL-NEXT: 1 3 0.33 fmul d2, d2, d0
|
||||
# ALL-NEXT: 1 3 0.33 fmul d1, d1, d2
|
||||
# ALL-NEXT: 1 2 1.00 fcmp d0, #0.0
|
||||
|
||||
# M3-NEXT: 2 5 1.00 fcsel d0, d0, d1, eq
|
||||
# M4-NEXT: 2 5 1.00 fcsel d0, d0, d1, eq
|
||||
# M5-NEXT: 2 2 1.00 fcsel d0, d0, d1, eq
|
|
@ -1,6 +1,7 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
|
||||
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,EM3
|
||||
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,EM4
|
||||
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,EM5
|
||||
|
||||
sub w0, w1, w2, sxtb #0
|
||||
add x3, x4, w5, sxth #1
|
||||
|
@ -16,6 +17,7 @@
|
|||
|
||||
# EM3-NEXT: Total Cycles: 304
|
||||
# EM4-NEXT: Total Cycles: 304
|
||||
# EM5-NEXT: Total Cycles: 254
|
||||
|
||||
# ALL-NEXT: Total uOps: 800
|
||||
|
||||
|
@ -29,6 +31,11 @@
|
|||
# EM4-NEXT: IPC: 2.63
|
||||
# EM4-NEXT: Block RThroughput: 3.0
|
||||
|
||||
# EM5: Dispatch Width: 6
|
||||
# EM5-NEXT: uOps Per Cycle: 3.15
|
||||
# EM5-NEXT: IPC: 3.15
|
||||
# EM5-NEXT: Block RThroughput: 2.5
|
||||
|
||||
# ALL: Instruction Info:
|
||||
# ALL-NEXT: [1]: #uOps
|
||||
# ALL-NEXT: [2]: Latency
|
||||
|
@ -56,3 +63,12 @@
|
|||
# EM4-NEXT: 1 2 0.50 add x15, x16, w17, uxth #1
|
||||
# EM4-NEXT: 1 2 0.50 subs x18, x19, w20, sxtw #2
|
||||
# EM4-NEXT: 1 2 0.50 adds x21, x22, x23, sxtx #3
|
||||
|
||||
# EM5-NEXT: 1 1 0.17 sub w0, w1, w2, sxtb
|
||||
# EM5-NEXT: 1 2 0.50 add x3, x4, w5, sxth #1
|
||||
# EM5-NEXT: 1 1 0.25 subs x6, x7, w8, uxtw #2
|
||||
# EM5-NEXT: 1 1 0.25 adds x9, x10, x11, uxtx #3
|
||||
# EM5-NEXT: 1 1 0.17 sub w12, w13, w14, uxtb
|
||||
# EM5-NEXT: 1 2 0.50 add x15, x16, w17, uxth #1
|
||||
# EM5-NEXT: 1 2 0.50 subs x18, x19, w20, sxtw #2
|
||||
# EM5-NEXT: 1 2 0.50 adds x21, x22, x23, sxtx #3
|
||||
|
|
|
@ -0,0 +1,94 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
|
||||
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,EM3
|
||||
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,EM4
|
||||
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,EM5
|
||||
|
||||
fdiv h0, h1, h2
|
||||
fdiv s1, s2, s3
|
||||
fdiv d2, d3, d4
|
||||
|
||||
fmul h3, h4, h5
|
||||
fmul s4, s5, s6
|
||||
fmul d5, d6, d7
|
||||
|
||||
fmadd h6, h7, h8, h9
|
||||
fmadd s7, s8, s9, s10
|
||||
fmadd d8, d9, d10, d11
|
||||
|
||||
fsqrt h9, h10
|
||||
fsqrt s10, s11
|
||||
fsqrt d11, d12
|
||||
|
||||
# ALL: Iterations: 100
|
||||
|
||||
# EM3-NEXT: Instructions: 800
|
||||
# EM3-NEXT: Total Cycles: 4503
|
||||
# EM3-NEXT: Total uOps: 800
|
||||
|
||||
# EM4-NEXT: Instructions: 1200
|
||||
# EM4-NEXT: Total Cycles: 575
|
||||
# EM4-NEXT: Total uOps: 1200
|
||||
|
||||
# EM5-NEXT: Instructions: 1200
|
||||
# EM5-NEXT: Total Cycles: 433
|
||||
# EM5-NEXT: Total uOps: 1200
|
||||
|
||||
# ALL: Dispatch Width: 6
|
||||
|
||||
# EM3-NEXT: uOps Per Cycle: 0.18
|
||||
# EM3-NEXT: IPC: 0.18
|
||||
# EM3-NEXT: Block RThroughput: 45.0
|
||||
|
||||
# EM4-NEXT: uOps Per Cycle: 2.09
|
||||
# EM4-NEXT: IPC: 2.09
|
||||
# EM4-NEXT: Block RThroughput: 4.0
|
||||
|
||||
# EM5-NEXT: uOps Per Cycle: 2.77
|
||||
# EM5-NEXT: IPC: 2.77
|
||||
# EM5-NEXT: Block RThroughput: 4.0
|
||||
|
||||
# ALL: Instruction Info:
|
||||
# ALL-NEXT: [1]: #uOps
|
||||
# ALL-NEXT: [2]: Latency
|
||||
# ALL-NEXT: [3]: RThroughput
|
||||
# ALL-NEXT: [4]: MayLoad
|
||||
# ALL-NEXT: [5]: MayStore
|
||||
# ALL-NEXT: [6]: HasSideEffects (U)
|
||||
|
||||
# EM3: [1] [2] [3] [4] [5] [6] Instructions:
|
||||
# EM3-NEXT: 1 7 2.00 fdiv s1, s2, s3
|
||||
# EM3-NEXT: 1 12 3.25 fdiv d2, d3, d4
|
||||
# EM3-NEXT: 1 3 0.33 fmul s4, s5, s6
|
||||
# EM3-NEXT: 1 3 0.33 fmul d5, d6, d7
|
||||
# EM3-NEXT: 1 4 0.33 fmadd s7, s8, s9, s10
|
||||
# EM3-NEXT: 1 4 0.33 fmadd d8, d9, d10, d11
|
||||
# EM3-NEXT: 1 18 19.00 fsqrt s10, s11
|
||||
# EM3-NEXT: 1 25 26.00 fsqrt d11, d12
|
||||
|
||||
# EM4: [1] [2] [3] [4] [5] [6] Instructions:
|
||||
# EM4-NEXT: 1 7 3.00 fdiv h0, h1, h2
|
||||
# EM4-NEXT: 1 7 1.50 fdiv s1, s2, s3
|
||||
# EM4-NEXT: 1 12 2.25 fdiv d2, d3, d4
|
||||
# EM4-NEXT: 1 3 0.50 fmul h3, h4, h5
|
||||
# EM4-NEXT: 1 3 0.33 fmul s4, s5, s6
|
||||
# EM4-NEXT: 1 3 0.33 fmul d5, d6, d7
|
||||
# EM4-NEXT: 1 4 0.50 fmadd h6, h7, h8, h9
|
||||
# EM4-NEXT: 1 4 0.33 fmadd s7, s8, s9, s10
|
||||
# EM4-NEXT: 1 4 0.33 fmadd d8, d9, d10, d11
|
||||
# EM4-NEXT: 1 7 3.00 fsqrt h9, h10
|
||||
# EM4-NEXT: 1 8 1.75 fsqrt s10, s11
|
||||
# EM4-NEXT: 1 12 2.25 fsqrt d11, d12
|
||||
|
||||
# EM5: [1] [2] [3] [4] [5] [6] Instructions:
|
||||
# EM5-NEXT: 1 5 0.50 fdiv h0, h1, h2
|
||||
# EM5-NEXT: 1 7 1.00 fdiv s1, s2, s3
|
||||
# EM5-NEXT: 1 12 2.25 fdiv d2, d3, d4
|
||||
# EM5-NEXT: 1 3 0.33 fmul h3, h4, h5
|
||||
# EM5-NEXT: 1 3 0.33 fmul s4, s5, s6
|
||||
# EM5-NEXT: 1 3 0.33 fmul d5, d6, d7
|
||||
# EM5-NEXT: 1 4 0.33 fmadd h6, h7, h8, h9
|
||||
# EM5-NEXT: 1 4 0.33 fmadd s7, s8, s9, s10
|
||||
# EM5-NEXT: 1 4 0.33 fmadd d8, d9, d10, d11
|
||||
# EM5-NEXT: 1 5 0.50 fsqrt h9, h10
|
||||
# EM5-NEXT: 1 8 1.25 fsqrt s10, s11
|
||||
# EM5-NEXT: 1 12 2.25 fsqrt d11, d12
|
|
@ -0,0 +1,114 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
|
||||
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,EM3
|
||||
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,EM4
|
||||
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,EM5
|
||||
|
||||
scvtf h0, w0
|
||||
scvtf s1, w1
|
||||
scvtf d2, x2
|
||||
|
||||
fcvtzs w3, h3
|
||||
fcvtzs w4, s4
|
||||
fcvtzs x5, d5
|
||||
|
||||
fmov h6, #2.0
|
||||
fmov s7, #4.0
|
||||
fmov d8, #8.0
|
||||
|
||||
fmov h9, w9
|
||||
fmov s10, w10
|
||||
fmov d11, x11
|
||||
fmov v12.d[1], x12
|
||||
|
||||
fmov w13, h13
|
||||
fmov w14, s14
|
||||
fmov x15, d15
|
||||
fmov x16, v16.d[1]
|
||||
|
||||
# ALL: Iterations: 100
|
||||
|
||||
# EM3-NEXT: Instructions: 1200
|
||||
# EM3-NEXT: Total Cycles: 405
|
||||
# EM3-NEXT: Total uOps: 1400
|
||||
|
||||
# EM4-NEXT: Instructions: 1700
|
||||
# EM4-NEXT: Total Cycles: 1108
|
||||
# EM4-NEXT: Total uOps: 1900
|
||||
|
||||
# EM5-NEXT: Instructions: 1700
|
||||
# EM5-NEXT: Total Cycles: 1407
|
||||
# EM5-NEXT: Total uOps: 1900
|
||||
|
||||
# ALL: Dispatch Width: 6
|
||||
|
||||
# EM3-NEXT: uOps Per Cycle: 3.46
|
||||
# EM3-NEXT: IPC: 2.96
|
||||
# EM3-NEXT: Block RThroughput: 4.0
|
||||
|
||||
# EM4-NEXT: uOps Per Cycle: 1.71
|
||||
# EM4-NEXT: IPC: 1.53
|
||||
# EM4-NEXT: Block RThroughput: 11.0
|
||||
|
||||
# EM5-NEXT: uOps Per Cycle: 1.35
|
||||
# EM5-NEXT: IPC: 1.21
|
||||
# EM5-NEXT: Block RThroughput: 14.0
|
||||
|
||||
# ALL: Instruction Info:
|
||||
# ALL-NEXT: [1]: #uOps
|
||||
# ALL-NEXT: [2]: Latency
|
||||
# ALL-NEXT: [3]: RThroughput
|
||||
# ALL-NEXT: [4]: MayLoad
|
||||
# ALL-NEXT: [5]: MayStore
|
||||
# ALL-NEXT: [6]: HasSideEffects (U)
|
||||
|
||||
# EM3: [1] [2] [3] [4] [5] [6] Instructions:
|
||||
# EM3-NEXT: 1 4 1.00 scvtf s1, w1
|
||||
# EM3-NEXT: 1 4 1.00 scvtf d2, x2
|
||||
# EM3-NEXT: 1 3 1.00 fcvtzs w4, s4
|
||||
# EM3-NEXT: 1 3 1.00 fcvtzs x5, d5
|
||||
# EM3-NEXT: 1 1 0.33 fmov s7, #4.00000000
|
||||
# EM3-NEXT: 1 1 0.33 fmov d8, #8.00000000
|
||||
# EM3-NEXT: 1 1 0.33 fmov s10, w10
|
||||
# EM3-NEXT: 1 1 0.33 fmov d11, x11
|
||||
# EM3-NEXT: 2 5 1.00 fmov v12.d[1], x12
|
||||
# EM3-NEXT: 1 1 0.33 fmov w14, s14
|
||||
# EM3-NEXT: 1 1 0.33 fmov x15, d15
|
||||
# EM3-NEXT: 2 5 1.00 fmov x16, v16.d[1]
|
||||
|
||||
# EM4: [1] [2] [3] [4] [5] [6] Instructions:
|
||||
# EM4-NEXT: 1 6 1.00 scvtf h0, w0
|
||||
# EM4-NEXT: 1 6 1.00 scvtf s1, w1
|
||||
# EM4-NEXT: 1 6 1.00 scvtf d2, x2
|
||||
# EM4-NEXT: 1 4 1.00 fcvtzs w3, h3
|
||||
# EM4-NEXT: 1 4 1.00 fcvtzs w4, s4
|
||||
# EM4-NEXT: 1 4 1.00 fcvtzs x5, d5
|
||||
# EM4-NEXT: 1 1 0.33 fmov h6, #2.00000000
|
||||
# EM4-NEXT: 1 1 0.33 fmov s7, #4.00000000
|
||||
# EM4-NEXT: 1 1 0.33 fmov d8, #8.00000000
|
||||
# EM4-NEXT: 1 3 1.00 fmov h9, w9
|
||||
# EM4-NEXT: 1 3 1.00 fmov s10, w10
|
||||
# EM4-NEXT: 1 3 1.00 fmov d11, x11
|
||||
# EM4-NEXT: 2 2 1.00 fmov v12.d[1], x12
|
||||
# EM4-NEXT: 1 4 1.00 fmov w13, h13
|
||||
# EM4-NEXT: 1 4 1.00 fmov w14, s14
|
||||
# EM4-NEXT: 1 4 1.00 fmov x15, d15
|
||||
# EM4-NEXT: 2 5 1.00 fmov x16, v16.d[1]
|
||||
|
||||
# EM5: [1] [2] [3] [4] [5] [6] Instructions:
|
||||
# EM5-NEXT: 1 6 1.00 scvtf h0, w0
|
||||
# EM5-NEXT: 1 6 1.00 scvtf s1, w1
|
||||
# EM5-NEXT: 1 6 1.00 scvtf d2, x2
|
||||
# EM5-NEXT: 1 4 1.00 fcvtzs w3, h3
|
||||
# EM5-NEXT: 1 4 1.00 fcvtzs w4, s4
|
||||
# EM5-NEXT: 1 4 1.00 fcvtzs x5, d5
|
||||
# EM5-NEXT: 1 1 0.33 fmov h6, #2.00000000
|
||||
# EM5-NEXT: 1 1 0.33 fmov s7, #4.00000000
|
||||
# EM5-NEXT: 1 1 0.33 fmov d8, #8.00000000
|
||||
# EM5-NEXT: 1 4 1.00 fmov h9, w9
|
||||
# EM5-NEXT: 1 4 1.00 fmov s10, w10
|
||||
# EM5-NEXT: 1 4 1.00 fmov d11, x11
|
||||
# EM5-NEXT: 2 6 1.00 fmov v12.d[1], x12
|
||||
# EM5-NEXT: 1 3 1.00 fmov w13, h13
|
||||
# EM5-NEXT: 1 3 1.00 fmov w14, s14
|
||||
# EM5-NEXT: 1 3 1.00 fmov x15, d15
|
||||
# EM5-NEXT: 2 5 1.00 fmov x16, v16.d[1]
|
|
@ -0,0 +1,153 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
|
||||
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3
|
||||
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4
|
||||
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5
|
||||
|
||||
ldr s0, 1f
|
||||
ldr q0, 1f
|
||||
|
||||
ldur d0, [sp, #2]
|
||||
ldur q0, [sp, #16]
|
||||
|
||||
ldr b0, [sp], #1
|
||||
ldr q0, [sp], #16
|
||||
|
||||
ldr h0, [sp, #2]!
|
||||
ldr q0, [sp, #16]!
|
||||
|
||||
ldr s0, [sp, #4]
|
||||
ldr q0, [sp, #16]
|
||||
|
||||
ldr d0, [sp, x0, lsl #3]
|
||||
ldr q0, [sp, x0, lsl #4]
|
||||
|
||||
ldr b0, [sp, x0]
|
||||
ldr q0, [sp, x0]
|
||||
|
||||
ldr h0, [sp, w0, sxtw #1]
|
||||
ldr q0, [sp, w0, uxtw #4]
|
||||
|
||||
ldr s0, [sp, w0, sxtw]
|
||||
ldr q0, [sp, w0, uxtw]
|
||||
|
||||
ldp d0, d1, [sp], #16
|
||||
ldp q0, q1, [sp], #32
|
||||
|
||||
ldp s0, s1, [sp, #8]!
|
||||
ldp q0, q1, [sp, #32]!
|
||||
|
||||
ldp d0, d1, [sp, #16]
|
||||
ldp q0, q1, [sp, #32]
|
||||
|
||||
1:
|
||||
|
||||
# ALL: Iterations: 100
|
||||
# ALL-NEXT: Instructions: 2400
|
||||
|
||||
# M3-NEXT: Total Cycles: 4708
|
||||
# M3-NEXT: Total uOps: 3200
|
||||
|
||||
# M4-NEXT: Total Cycles: 4708
|
||||
# M4-NEXT: Total uOps: 3200
|
||||
|
||||
# M5-NEXT: Total Cycles: 5509
|
||||
# M5-NEXT: Total uOps: 3300
|
||||
|
||||
# ALL: Dispatch Width: 6
|
||||
|
||||
# M3-NEXT: uOps Per Cycle: 0.68
|
||||
# M3-NEXT: IPC: 0.51
|
||||
# M3-NEXT: Block RThroughput: 13.5
|
||||
|
||||
# M4-NEXT: uOps Per Cycle: 0.68
|
||||
# M4-NEXT: IPC: 0.51
|
||||
# M4-NEXT: Block RThroughput: 13.0
|
||||
|
||||
# M5-NEXT: uOps Per Cycle: 0.60
|
||||
# M5-NEXT: IPC: 0.44
|
||||
# M5-NEXT: Block RThroughput: 13.5
|
||||
|
||||
# ALL: Instruction Info:
|
||||
# ALL-NEXT: [1]: #uOps
|
||||
# ALL-NEXT: [2]: Latency
|
||||
# ALL-NEXT: [3]: RThroughput
|
||||
# ALL-NEXT: [4]: MayLoad
|
||||
# ALL-NEXT: [5]: MayStore
|
||||
# ALL-NEXT: [6]: HasSideEffects (U)
|
||||
|
||||
# ALL: [1] [2] [3] [4] [5] [6] Instructions:
|
||||
|
||||
# M3-NEXT: 1 5 0.50 * ldr s0, {{\.?}}Ltmp0
|
||||
# M3-NEXT: 1 5 0.50 * ldr q0, {{\.?}}Ltmp0
|
||||
# M3-NEXT: 1 5 0.50 * ldur d0, [sp, #2]
|
||||
# M3-NEXT: 1 5 0.50 * ldur q0, [sp, #16]
|
||||
# M3-NEXT: 1 5 0.50 * ldr b0, [sp], #1
|
||||
# M3-NEXT: 1 5 0.50 * ldr q0, [sp], #16
|
||||
# M3-NEXT: 1 5 0.50 * ldr h0, [sp, #2]!
|
||||
# M3-NEXT: 1 5 0.50 * ldr q0, [sp, #16]!
|
||||
# M3-NEXT: 1 5 0.50 * ldr s0, [sp, #4]
|
||||
# M3-NEXT: 1 5 0.50 * ldr q0, [sp, #16]
|
||||
# M3-NEXT: 1 5 0.50 * ldr d0, [sp, x0, lsl #3]
|
||||
# M3-NEXT: 2 6 0.50 * ldr q0, [sp, x0, lsl #4]
|
||||
# M3-NEXT: 1 5 0.50 * ldr b0, [sp, x0]
|
||||
# M3-NEXT: 1 5 0.50 * ldr q0, [sp, x0]
|
||||
# M3-NEXT: 2 6 0.50 * ldr h0, [sp, w0, sxtw #1]
|
||||
# M3-NEXT: 2 6 0.50 * ldr q0, [sp, w0, uxtw #4]
|
||||
# M3-NEXT: 2 6 0.50 * ldr s0, [sp, w0, sxtw]
|
||||
# M3-NEXT: 1 5 0.50 * ldr q0, [sp, w0, uxtw]
|
||||
# M3-NEXT: 2 5 0.50 * ldp d0, d1, [sp], #16
|
||||
# M3-NEXT: 2 5 1.00 * ldp q0, q1, [sp], #32
|
||||
# M3-NEXT: 2 5 0.50 * ldp s0, s1, [sp, #8]!
|
||||
# M3-NEXT: 2 5 1.00 * ldp q0, q1, [sp, #32]!
|
||||
# M3-NEXT: 1 5 0.50 * ldp d0, d1, [sp, #16]
|
||||
# M3-NEXT: 1 5 1.00 * ldp q0, q1, [sp, #32]
|
||||
|
||||
# M4-NEXT: 1 5 0.50 * ldr s0, {{\.?}}Ltmp0
|
||||
# M4-NEXT: 1 5 0.50 * ldr q0, {{\.?}}Ltmp0
|
||||
# M4-NEXT: 1 5 0.50 * ldur d0, [sp, #2]
|
||||
# M4-NEXT: 1 5 0.50 * ldur q0, [sp, #16]
|
||||
# M4-NEXT: 1 5 0.50 * ldr b0, [sp], #1
|
||||
# M4-NEXT: 1 5 0.50 * ldr q0, [sp], #16
|
||||
# M4-NEXT: 1 5 0.50 * ldr h0, [sp, #2]!
|
||||
# M4-NEXT: 1 5 0.50 * ldr q0, [sp, #16]!
|
||||
# M4-NEXT: 1 5 0.50 * ldr s0, [sp, #4]
|
||||
# M4-NEXT: 1 5 0.50 * ldr q0, [sp, #16]
|
||||
# M4-NEXT: 1 5 0.50 * ldr d0, [sp, x0, lsl #3]
|
||||
# M4-NEXT: 2 6 0.50 * ldr q0, [sp, x0, lsl #4]
|
||||
# M4-NEXT: 1 5 0.50 * ldr b0, [sp, x0]
|
||||
# M4-NEXT: 1 5 0.50 * ldr q0, [sp, x0]
|
||||
# M4-NEXT: 2 6 0.50 * ldr h0, [sp, w0, sxtw #1]
|
||||
# M4-NEXT: 2 6 0.50 * ldr q0, [sp, w0, uxtw #4]
|
||||
# M4-NEXT: 2 6 0.50 * ldr s0, [sp, w0, sxtw]
|
||||
# M4-NEXT: 2 6 0.50 * ldr q0, [sp, w0, uxtw]
|
||||
# M4-NEXT: 1 5 0.50 * ldp d0, d1, [sp], #16
|
||||
# M4-NEXT: 2 5 0.50 * ldp q0, q1, [sp], #32
|
||||
# M4-NEXT: 2 5 0.50 * ldp s0, s1, [sp, #8]!
|
||||
# M4-NEXT: 2 5 1.00 * ldp q0, q1, [sp, #32]!
|
||||
# M4-NEXT: 1 5 0.50 * ldp d0, d1, [sp, #16]
|
||||
# M4-NEXT: 1 5 1.00 * ldp q0, q1, [sp, #32]
|
||||
|
||||
# M5-NEXT: 1 6 0.50 * ldr s0, {{\.?}}Ltmp0
|
||||
# M5-NEXT: 1 6 0.50 * ldr q0, {{\.?}}Ltmp0
|
||||
# M5-NEXT: 1 6 0.50 * ldur d0, [sp, #2]
|
||||
# M5-NEXT: 1 6 0.50 * ldur q0, [sp, #16]
|
||||
# M5-NEXT: 1 6 0.50 * ldr b0, [sp], #1
|
||||
# M5-NEXT: 1 6 0.50 * ldr q0, [sp], #16
|
||||
# M5-NEXT: 1 6 0.50 * ldr h0, [sp, #2]!
|
||||
# M5-NEXT: 1 6 0.50 * ldr q0, [sp, #16]!
|
||||
# M5-NEXT: 1 6 0.50 * ldr s0, [sp, #4]
|
||||
# M5-NEXT: 1 6 0.50 * ldr q0, [sp, #16]
|
||||
# M5-NEXT: 1 6 0.50 * ldr d0, [sp, x0, lsl #3]
|
||||
# M5-NEXT: 2 7 0.50 * ldr q0, [sp, x0, lsl #4]
|
||||
# M5-NEXT: 1 6 0.50 * ldr b0, [sp, x0]
|
||||
# M5-NEXT: 1 6 0.50 * ldr q0, [sp, x0]
|
||||
# M5-NEXT: 2 7 0.50 * ldr h0, [sp, w0, sxtw #1]
|
||||
# M5-NEXT: 2 7 0.50 * ldr q0, [sp, w0, uxtw #4]
|
||||
# M5-NEXT: 2 7 0.50 * ldr s0, [sp, w0, sxtw]
|
||||
# M5-NEXT: 2 7 0.50 * ldr q0, [sp, w0, uxtw]
|
||||
# M5-NEXT: 2 6 0.50 * ldp d0, d1, [sp], #16
|
||||
# M5-NEXT: 2 6 1.00 * ldp q0, q1, [sp], #32
|
||||
# M5-NEXT: 2 6 0.50 * ldp s0, s1, [sp, #8]!
|
||||
# M5-NEXT: 2 6 1.00 * ldp q0, q1, [sp, #32]!
|
||||
# M5-NEXT: 1 6 0.50 * ldp d0, d1, [sp, #16]
|
||||
# M5-NEXT: 1 6 1.00 * ldp q0, q1, [sp, #32]
|
|
@ -0,0 +1,62 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
|
||||
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3
|
||||
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4
|
||||
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5
|
||||
|
||||
fmov s31, #1.00000000
|
||||
fdiv s30, s31, s30
|
||||
|
||||
# Newton series for 1 / x.
|
||||
frecpe s1, s0
|
||||
frecps s2, s0, s1
|
||||
fmul s1, s1, s2
|
||||
frecps s0, s0, s1
|
||||
fmul s0, s1, s0
|
||||
|
||||
# ALL: Iterations: 100
|
||||
# ALL-NEXT: Instructions: 700
|
||||
|
||||
# M3-NEXT: Total Cycles: 1803
|
||||
# M4-NEXT: Total Cycles: 1703
|
||||
# M5-NEXT: Total Cycles: 1703
|
||||
|
||||
# ALL-NEXT: Total uOps: 700
|
||||
|
||||
# ALL: Dispatch Width: 6
|
||||
|
||||
# M3-NEXT: uOps Per Cycle: 0.39
|
||||
# M3-NEXT: IPC: 0.39
|
||||
# M3-NEXT: Block RThroughput: 2.0
|
||||
|
||||
# M4-NEXT: uOps Per Cycle: 0.41
|
||||
# M4-NEXT: IPC: 0.41
|
||||
# M4-NEXT: Block RThroughput: 1.5
|
||||
|
||||
# M5-NEXT: uOps Per Cycle: 0.41
|
||||
# M5-NEXT: IPC: 0.41
|
||||
# M5-NEXT: Block RThroughput: 1.3
|
||||
|
||||
# ALL: Instruction Info:
|
||||
# ALL-NEXT: [1]: #uOps
|
||||
# ALL-NEXT: [2]: Latency
|
||||
# ALL-NEXT: [3]: RThroughput
|
||||
# ALL-NEXT: [4]: MayLoad
|
||||
# ALL-NEXT: [5]: MayStore
|
||||
# ALL-NEXT: [6]: HasSideEffects (U)
|
||||
|
||||
# ALL: [1] [2] [3] [4] [5] [6] Instructions:
|
||||
# ALL-NEXT: 1 1 0.33 fmov s31, #1.00000000
|
||||
|
||||
# M3-NEXT: 1 7 2.00 fdiv s30, s31, s30
|
||||
# M3-NEXT: 1 4 0.50 frecpe s1, s0
|
||||
|
||||
# M4-NEXT: 1 7 1.50 fdiv s30, s31, s30
|
||||
# M4-NEXT: 1 3 0.50 frecpe s1, s0
|
||||
|
||||
# M5-NEXT: 1 7 1.00 fdiv s30, s31, s30
|
||||
# M5-NEXT: 1 3 0.50 frecpe s1, s0
|
||||
|
||||
# ALL-NEXT: 1 4 0.33 frecps s2, s0, s1
|
||||
# ALL-NEXT: 1 3 0.33 fmul s1, s1, s2
|
||||
# ALL-NEXT: 1 4 0.33 frecps s0, s0, s1
|
||||
# ALL-NEXT: 1 3 0.33 fmul s0, s1, s0
|
|
@ -0,0 +1,72 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
|
||||
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3
|
||||
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4
|
||||
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5
|
||||
|
||||
fsqrt s30, s30
|
||||
fmov s31, #1.00000000
|
||||
fdiv s30, s31, s30
|
||||
|
||||
# Newton series for 1 / sqrtf().
|
||||
frsqrte s1, s0
|
||||
fmul s2, s1, s1
|
||||
frsqrts s2, s0, s2
|
||||
fmul s1, s1, s2
|
||||
fmul s2, s1, s1
|
||||
frsqrts s0, s0, s2
|
||||
fmul s0, s1, s0
|
||||
|
||||
# ALL: Iterations: 100
|
||||
# ALL-NEXT: Instructions: 1000
|
||||
|
||||
# M3-NEXT: Total Cycles: 2503
|
||||
# M4-NEXT: Total Cycles: 2303
|
||||
# M5-NEXT: Total Cycles: 2303
|
||||
|
||||
# ALL-NEXT: Total uOps: 1000
|
||||
|
||||
# ALL: Dispatch Width: 6
|
||||
|
||||
# M3-NEXT: uOps Per Cycle: 0.40
|
||||
# M3-NEXT: IPC: 0.40
|
||||
# M3-NEXT: Block RThroughput: 19.0
|
||||
|
||||
# M4-NEXT: uOps Per Cycle: 0.43
|
||||
# M4-NEXT: IPC: 0.43
|
||||
# M4-NEXT: Block RThroughput: 2.0
|
||||
|
||||
# M5-NEXT: uOps Per Cycle: 0.43
|
||||
# M5-NEXT: IPC: 0.43
|
||||
# M5-NEXT: Block RThroughput: 2.0
|
||||
|
||||
# ALL: Instruction Info:
|
||||
# ALL-NEXT: [1]: #uOps
|
||||
# ALL-NEXT: [2]: Latency
|
||||
# ALL-NEXT: [3]: RThroughput
|
||||
# ALL-NEXT: [4]: MayLoad
|
||||
# ALL-NEXT: [5]: MayStore
|
||||
# ALL-NEXT: [6]: HasSideEffects (U)
|
||||
|
||||
# ALL: [1] [2] [3] [4] [5] [6] Instructions:
|
||||
|
||||
# M3-NEXT: 1 18 19.00 fsqrt s30, s30
|
||||
# M4-NEXT: 1 8 1.75 fsqrt s30, s30
|
||||
# M5-NEXT: 1 8 1.25 fsqrt s30, s30
|
||||
|
||||
# ALL-NEXT: 1 1 0.33 fmov s31, #1.00000000
|
||||
|
||||
# M3-NEXT: 1 7 2.00 fdiv s30, s31, s30
|
||||
# M3-NEXT: 1 4 0.50 frsqrte s1, s0
|
||||
|
||||
# M4-NEXT: 1 7 1.50 fdiv s30, s31, s30
|
||||
# M4-NEXT: 1 3 0.50 frsqrte s1, s0
|
||||
|
||||
# M5-NEXT: 1 7 1.00 fdiv s30, s31, s30
|
||||
# M5-NEXT: 1 3 0.50 frsqrte s1, s0
|
||||
|
||||
# ALL-NEXT: 1 3 0.33 fmul s2, s1, s1
|
||||
# ALL-NEXT: 1 4 0.33 frsqrts s2, s0, s2
|
||||
# ALL-NEXT: 1 3 0.33 fmul s1, s1, s2
|
||||
# ALL-NEXT: 1 3 0.33 fmul s2, s1, s1
|
||||
# ALL-NEXT: 1 4 0.33 frsqrts s0, s0, s2
|
||||
# ALL-NEXT: 1 3 0.33 fmul s0, s1, s0
|
|
@ -0,0 +1,73 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
|
||||
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3
|
||||
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4
|
||||
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5
|
||||
|
||||
fsqrt s31, s31
|
||||
|
||||
# Newton series for sqrtf().
|
||||
frsqrte s1, s0
|
||||
fmul s2, s1, s1
|
||||
frsqrts s2, s0, s2
|
||||
fmul s1, s1, s2
|
||||
fmul s2, s1, s1
|
||||
frsqrts s2, s0, s2
|
||||
fmul s2, s2, s0
|
||||
fmul s1, s1, s2
|
||||
fcmp s0, #0.0
|
||||
fcsel s0, s0, s1, eq
|
||||
|
||||
# ALL: Iterations: 100
|
||||
# ALL-NEXT: Instructions: 1100
|
||||
|
||||
# M3-NEXT: Total Cycles: 3203
|
||||
# M4-NEXT: Total Cycles: 3103
|
||||
# M5-NEXT: Total Cycles: 2803
|
||||
|
||||
# ALL-NEXT: Total uOps: 1200
|
||||
|
||||
# ALL: Dispatch Width: 6
|
||||
|
||||
# M3-NEXT: uOps Per Cycle: 0.37
|
||||
# M3-NEXT: IPC: 0.34
|
||||
# M3-NEXT: Block RThroughput: 20.0
|
||||
|
||||
# M4-NEXT: uOps Per Cycle: 0.39
|
||||
# M4-NEXT: IPC: 0.35
|
||||
# M4-NEXT: Block RThroughput: 2.3
|
||||
|
||||
# M5-NEXT: uOps Per Cycle: 0.43
|
||||
# M5-NEXT: IPC: 0.39
|
||||
# M5-NEXT: Block RThroughput: 2.3
|
||||
|
||||
# ALL: Instruction Info:
|
||||
# ALL-NEXT: [1]: #uOps
|
||||
# ALL-NEXT: [2]: Latency
|
||||
# ALL-NEXT: [3]: RThroughput
|
||||
# ALL-NEXT: [4]: MayLoad
|
||||
# ALL-NEXT: [5]: MayStore
|
||||
# ALL-NEXT: [6]: HasSideEffects (U)
|
||||
|
||||
# ALL: [1] [2] [3] [4] [5] [6] Instructions:
|
||||
|
||||
# M3-NEXT: 1 18 19.00 fsqrt s31, s31
|
||||
# M3-NEXT: 1 4 0.50 frsqrte s1, s0
|
||||
|
||||
# M4-NEXT: 1 8 1.75 fsqrt s31, s31
|
||||
# M4-NEXT: 1 3 0.50 frsqrte s1, s0
|
||||
|
||||
# M5-NEXT: 1 8 1.25 fsqrt s31, s31
|
||||
# M5-NEXT: 1 3 0.50 frsqrte s1, s0
|
||||
|
||||
# ALL-NEXT: 1 3 0.33 fmul s2, s1, s1
|
||||
# ALL-NEXT: 1 4 0.33 frsqrts s2, s0, s2
|
||||
# ALL-NEXT: 1 3 0.33 fmul s1, s1, s2
|
||||
# ALL-NEXT: 1 3 0.33 fmul s2, s1, s1
|
||||
# ALL-NEXT: 1 4 0.33 frsqrts s2, s0, s2
|
||||
# ALL-NEXT: 1 3 0.33 fmul s2, s2, s0
|
||||
# ALL-NEXT: 1 3 0.33 fmul s1, s1, s2
|
||||
# ALL-NEXT: 1 2 1.00 fcmp s0, #0.0
|
||||
|
||||
# M3-NEXT: 2 5 1.00 fcsel s0, s0, s1, eq
|
||||
# M4-NEXT: 2 5 1.00 fcsel s0, s0, s1, eq
|
||||
# M5-NEXT: 2 2 1.00 fcsel s0, s0, s1, eq
|
|
@ -0,0 +1,142 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
|
||||
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3
|
||||
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4
|
||||
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5
|
||||
|
||||
stur d0, [sp, #2]
|
||||
stur q0, [sp, #16]
|
||||
|
||||
str b0, [sp], #1
|
||||
str q0, [sp], #16
|
||||
|
||||
str h0, [sp, #2]!
|
||||
str q0, [sp, #16]!
|
||||
|
||||
str s0, [sp, #4]
|
||||
str q0, [sp, #16]
|
||||
|
||||
str d0, [sp, x0, lsl #3]
|
||||
str q0, [sp, x0, lsl #4]
|
||||
|
||||
str b0, [sp, x0]
|
||||
str q0, [sp, x0]
|
||||
|
||||
str h0, [sp, w0, sxtw #1]
|
||||
str q0, [sp, w0, uxtw #4]
|
||||
|
||||
str s0, [sp, w0, sxtw]
|
||||
str q0, [sp, w0, uxtw]
|
||||
|
||||
stp d0, d1, [sp], #16
|
||||
stp q0, q1, [sp], #32
|
||||
|
||||
stp s0, s1, [sp, #8]!
|
||||
stp q0, q1, [sp, #32]!
|
||||
|
||||
stp d0, d1, [sp, #16]
|
||||
stp q0, q1, [sp, #32]
|
||||
|
||||
# ALL: Iterations: 100
|
||||
# ALL-NEXT: Instructions: 2200
|
||||
|
||||
# M3-NEXT: Total Cycles: 3203
|
||||
# M3-NEXT: Total uOps: 2900
|
||||
|
||||
# M4-NEXT: Total Cycles: 3203
|
||||
# M4-NEXT: Total uOps: 3000
|
||||
|
||||
# M5-NEXT: Total Cycles: 2803
|
||||
# M5-NEXT: Total uOps: 2500
|
||||
|
||||
# ALL: Dispatch Width: 6
|
||||
|
||||
# M3-NEXT: uOps Per Cycle: 0.91
|
||||
# M3-NEXT: IPC: 0.69
|
||||
# M3-NEXT: Block RThroughput: 22.0
|
||||
|
||||
# M4-NEXT: uOps Per Cycle: 0.94
|
||||
# M4-NEXT: IPC: 0.69
|
||||
# M4-NEXT: Block RThroughput: 12.5
|
||||
|
||||
# M5-NEXT: uOps Per Cycle: 0.89
|
||||
# M5-NEXT: IPC: 0.78
|
||||
# M5-NEXT: Block RThroughput: 11.0
|
||||
|
||||
# ALL: Instruction Info:
|
||||
# ALL-NEXT: [1]: #uOps
|
||||
# ALL-NEXT: [2]: Latency
|
||||
# ALL-NEXT: [3]: RThroughput
|
||||
# ALL-NEXT: [4]: MayLoad
|
||||
# ALL-NEXT: [5]: MayStore
|
||||
# ALL-NEXT: [6]: HasSideEffects (U)
|
||||
|
||||
# ALL: [1] [2] [3] [4] [5] [6] Instructions:
|
||||
|
||||
# M3-NEXT: 1 1 1.00 * stur d0, [sp, #2]
|
||||
# M3-NEXT: 1 1 1.00 * stur q0, [sp, #16]
|
||||
# M3-NEXT: 1 1 1.00 * str b0, [sp], #1
|
||||
# M3-NEXT: 1 1 1.00 * str q0, [sp], #16
|
||||
# M3-NEXT: 1 1 1.00 * str h0, [sp, #2]!
|
||||
# M3-NEXT: 1 1 1.00 * str q0, [sp, #16]!
|
||||
# M3-NEXT: 1 1 1.00 * str s0, [sp, #4]
|
||||
# M3-NEXT: 1 1 1.00 * str q0, [sp, #16]
|
||||
# M3-NEXT: 1 1 1.00 * str d0, [sp, x0, lsl #3]
|
||||
# M3-NEXT: 2 3 1.00 * str q0, [sp, x0, lsl #4]
|
||||
# M3-NEXT: 1 1 1.00 * str b0, [sp, x0]
|
||||
# M3-NEXT: 1 1 1.00 * str q0, [sp, x0]
|
||||
# M3-NEXT: 2 3 1.00 * str h0, [sp, w0, sxtw #1]
|
||||
# M3-NEXT: 2 3 1.00 * str q0, [sp, w0, uxtw #4]
|
||||
# M3-NEXT: 2 3 1.00 * str s0, [sp, w0, sxtw]
|
||||
# M3-NEXT: 2 3 1.00 * str q0, [sp, w0, uxtw]
|
||||
# M3-NEXT: 1 1 1.00 * stp d0, d1, [sp], #16
|
||||
# M3-NEXT: 2 1 1.00 * stp q0, q1, [sp], #32
|
||||
# M3-NEXT: 1 1 1.00 * stp s0, s1, [sp, #8]!
|
||||
# M3-NEXT: 2 1 1.00 * stp q0, q1, [sp, #32]!
|
||||
# M3-NEXT: 1 1 1.00 * stp d0, d1, [sp, #16]
|
||||
# M3-NEXT: 1 1 1.00 * stp q0, q1, [sp, #32]
|
||||
|
||||
# M4-NEXT: 1 1 0.50 * stur d0, [sp, #2]
|
||||
# M4-NEXT: 1 1 0.50 * stur q0, [sp, #16]
|
||||
# M4-NEXT: 1 1 0.50 * str b0, [sp], #1
|
||||
# M4-NEXT: 1 1 0.50 * str q0, [sp], #16
|
||||
# M4-NEXT: 1 1 0.50 * str h0, [sp, #2]!
|
||||
# M4-NEXT: 1 1 0.50 * str q0, [sp, #16]!
|
||||
# M4-NEXT: 1 1 0.50 * str s0, [sp, #4]
|
||||
# M4-NEXT: 1 1 0.50 * str q0, [sp, #16]
|
||||
# M4-NEXT: 1 1 0.50 * str d0, [sp, x0, lsl #3]
|
||||
# M4-NEXT: 2 3 0.50 * str q0, [sp, x0, lsl #4]
|
||||
# M4-NEXT: 1 1 0.50 * str b0, [sp, x0]
|
||||
# M4-NEXT: 1 1 0.50 * str q0, [sp, x0]
|
||||
# M4-NEXT: 2 3 0.50 * str h0, [sp, w0, sxtw #1]
|
||||
# M4-NEXT: 2 3 0.50 * str q0, [sp, w0, uxtw #4]
|
||||
# M4-NEXT: 2 3 0.50 * str s0, [sp, w0, sxtw]
|
||||
# M4-NEXT: 2 3 0.50 * str q0, [sp, w0, uxtw]
|
||||
# M4-NEXT: 1 1 0.50 * stp d0, d1, [sp], #16
|
||||
# M4-NEXT: 2 1 1.00 * stp q0, q1, [sp], #32
|
||||
# M4-NEXT: 1 1 0.50 * stp s0, s1, [sp, #8]!
|
||||
# M4-NEXT: 2 1 1.00 * stp q0, q1, [sp, #32]!
|
||||
# M4-NEXT: 1 1 0.50 * stp d0, d1, [sp, #16]
|
||||
# M4-NEXT: 2 1 1.00 * stp q0, q1, [sp, #32]
|
||||
|
||||
# M5-NEXT: 1 1 0.50 * stur d0, [sp, #2]
|
||||
# M5-NEXT: 1 1 0.50 * stur q0, [sp, #16]
|
||||
# M5-NEXT: 1 1 0.50 * str b0, [sp], #1
|
||||
# M5-NEXT: 1 1 0.50 * str q0, [sp], #16
|
||||
# M5-NEXT: 1 1 0.50 * str h0, [sp, #2]!
|
||||
# M5-NEXT: 1 1 0.50 * str q0, [sp, #16]!
|
||||
# M5-NEXT: 1 1 0.50 * str s0, [sp, #4]
|
||||
# M5-NEXT: 1 1 0.50 * str q0, [sp, #16]
|
||||
# M5-NEXT: 1 1 0.50 * str d0, [sp, x0, lsl #3]
|
||||
# M5-NEXT: 2 3 0.50 * str q0, [sp, x0, lsl #4]
|
||||
# M5-NEXT: 1 1 0.50 * str b0, [sp, x0]
|
||||
# M5-NEXT: 1 1 0.50 * str q0, [sp, x0]
|
||||
# M5-NEXT: 1 1 0.50 * str h0, [sp, w0, sxtw #1]
|
||||
# M5-NEXT: 2 3 0.50 * str q0, [sp, w0, uxtw #4]
|
||||
# M5-NEXT: 1 1 0.50 * str s0, [sp, w0, sxtw]
|
||||
# M5-NEXT: 2 3 0.50 * str q0, [sp, w0, uxtw]
|
||||
# M5-NEXT: 1 1 0.50 * stp d0, d1, [sp], #16
|
||||
# M5-NEXT: 1 1 1.00 * stp q0, q1, [sp], #32
|
||||
# M5-NEXT: 1 1 0.50 * stp s0, s1, [sp, #8]!
|
||||
# M5-NEXT: 1 1 1.00 * stp q0, q1, [sp, #32]!
|
||||
# M5-NEXT: 1 1 0.50 * stp d0, d1, [sp, #16]
|
||||
# M5-NEXT: 1 1 1.00 * stp q0, q1, [sp, #32]
|
|
@ -0,0 +1,66 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
|
||||
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3
|
||||
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4
|
||||
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5
|
||||
|
||||
ldr w0, 1f
|
||||
ldur x0, [sp, #8]
|
||||
ldrb w0, [sp], #1
|
||||
ldrsh w0, [sp, #2]!
|
||||
ldr x0, [sp, #8]
|
||||
ldrb w0, [sp, x31]
|
||||
ldrsh w0, [sp, x31, lsl #1]
|
||||
ldr w0, [sp, w31, sxtw]
|
||||
ldr x0, [sp, w31, uxtw #3]
|
||||
ldnp w0, w1, [sp, #8]
|
||||
ldp x0, x1, [sp], #16
|
||||
ldpsw x0, x1, [sp, #8]!
|
||||
|
||||
1:
|
||||
|
||||
# ALL: Iterations: 100
|
||||
# ALL-NEXT: Instructions: 1200
|
||||
# ALL-NEXT: Total Cycles: 1904
|
||||
|
||||
# M3-NEXT: Total uOps: 1600
|
||||
# M4-NEXT: Total uOps: 1400
|
||||
# M5-NEXT: Total uOps: 1400
|
||||
|
||||
# ALL: Dispatch Width: 6
|
||||
|
||||
# M3-NEXT: uOps Per Cycle: 0.84
|
||||
# M4-NEXT: uOps Per Cycle: 0.74
|
||||
# M5-NEXT: uOps Per Cycle: 0.74
|
||||
|
||||
# ALL-NEXT: IPC: 0.63
|
||||
# ALL-NEXT: Block RThroughput: 6.0
|
||||
|
||||
# ALL: Instruction Info:
|
||||
# ALL-NEXT: [1]: #uOps
|
||||
# ALL-NEXT: [2]: Latency
|
||||
# ALL-NEXT: [3]: RThroughput
|
||||
# ALL-NEXT: [4]: MayLoad
|
||||
# ALL-NEXT: [5]: MayStore
|
||||
# ALL-NEXT: [6]: HasSideEffects (U)
|
||||
|
||||
# ALL: [1] [2] [3] [4] [5] [6] Instructions:
|
||||
# ALL-NEXT: 1 4 0.50 * ldr w0, {{\.?}}Ltmp0
|
||||
# ALL-NEXT: 1 4 0.50 * ldur x0, [sp, #8]
|
||||
# ALL-NEXT: 1 4 0.50 * ldrb w0, [sp], #1
|
||||
# ALL-NEXT: 1 4 0.50 * ldrsh w0, [sp, #2]!
|
||||
# ALL-NEXT: 1 4 0.50 * ldr x0, [sp, #8]
|
||||
# ALL-NEXT: 1 4 0.50 * ldrb w0, [sp, xzr]
|
||||
# ALL-NEXT: 1 5 0.50 * ldrsh w0, [sp, xzr, lsl #1]
|
||||
|
||||
# M3-NEXT: 2 5 0.50 * ldr w0, [sp, wzr, sxtw]
|
||||
# M3-NEXT: 2 5 0.50 * ldr x0, [sp, wzr, uxtw #3]
|
||||
|
||||
# M4-NEXT: 1 5 0.50 * ldr w0, [sp, wzr, sxtw]
|
||||
# M4-NEXT: 1 5 0.50 * ldr x0, [sp, wzr, uxtw #3]
|
||||
|
||||
# M5-NEXT: 1 5 0.50 * ldr w0, [sp, wzr, sxtw]
|
||||
# M5-NEXT: 1 5 0.50 * ldr x0, [sp, wzr, uxtw #3]
|
||||
|
||||
# ALL-NEXT: 1 4 0.50 * ldnp w0, w1, [sp, #8]
|
||||
# ALL-NEXT: 2 4 0.50 * ldp x0, x1, [sp], #16
|
||||
# ALL-NEXT: 2 4 0.50 * ldpsw x0, x1, [sp, #8]!
|
|
@ -1,6 +1,7 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
|
||||
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -iterations=1 -scheduler-stats -resource-pressure=false -instruction-info=false < %s | FileCheck %s -check-prefixes=ALL,M3
|
||||
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -iterations=1 -scheduler-stats -resource-pressure=false -instruction-info=false < %s | FileCheck %s -check-prefixes=ALL,M4
|
||||
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -iterations=1 -scheduler-stats -resource-pressure=false -instruction-info=false < %s | FileCheck %s -check-prefixes=ALL,M5
|
||||
|
||||
b main
|
||||
|
||||
|
@ -19,6 +20,11 @@
|
|||
# M4-NEXT: IPC: 0.50
|
||||
# M4-NEXT: Block RThroughput: 0.2
|
||||
|
||||
# M5: Dispatch Width: 6
|
||||
# M5-NEXT: uOps Per Cycle: 0.50
|
||||
# M5-NEXT: IPC: 0.50
|
||||
# M5-NEXT: Block RThroughput: 0.2
|
||||
|
||||
# ALL: Schedulers - number of cycles where we saw N micro opcodes issued:
|
||||
# ALL-NEXT: [# issued], [# cycles]
|
||||
# ALL-NEXT: 0, 1 (50.0%)
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
|
||||
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,EM3
|
||||
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,EM4
|
||||
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,EM5
|
||||
|
||||
adds w0, w1, w2, lsl #0
|
||||
sub x3, x4, x5, lsr #1
|
||||
|
@ -9,13 +10,14 @@
|
|||
adds w12, w13, w14, lsl #4
|
||||
sub x15, x16, x17, lsr #6
|
||||
ands x18, x19, x20, lsl #8
|
||||
orr w21, w22, w23, asr #10
|
||||
eor w21, w22, w23, asr #10
|
||||
|
||||
# ALL: Iterations: 100
|
||||
# ALL-NEXT: Instructions: 800
|
||||
|
||||
# EM3-NEXT: Total Cycles: 354
|
||||
# EM4-NEXT: Total Cycles: 329
|
||||
# EM5-NEXT: Total Cycles: 220
|
||||
|
||||
# ALL-NEXT: Total uOps: 800
|
||||
|
||||
|
@ -29,6 +31,11 @@
|
|||
# EM4-NEXT: IPC: 2.43
|
||||
# EM4-NEXT: Block RThroughput: 3.3
|
||||
|
||||
# EM5: Dispatch Width: 6
|
||||
# EM5-NEXT: uOps Per Cycle: 3.64
|
||||
# EM5-NEXT: IPC: 3.64
|
||||
# EM5-NEXT: Block RThroughput: 1.5
|
||||
|
||||
# ALL: Instruction Info:
|
||||
# ALL-NEXT: [1]: #uOps
|
||||
# ALL-NEXT: [2]: Latency
|
||||
|
@ -46,7 +53,7 @@
|
|||
# EM3-NEXT: 1 2 0.50 adds w12, w13, w14, lsl #4
|
||||
# EM3-NEXT: 1 2 0.50 sub x15, x16, x17, lsr #6
|
||||
# EM3-NEXT: 1 2 0.50 ands x18, x19, x20, lsl #8
|
||||
# EM3-NEXT: 1 2 0.50 orr w21, w22, w23, asr #10
|
||||
# EM3-NEXT: 1 2 0.50 eor w21, w22, w23, asr #10
|
||||
|
||||
# EM4-NEXT: 1 1 0.25 adds w0, w1, w2
|
||||
# EM4-NEXT: 1 2 0.50 sub x3, x4, x5, lsr #1
|
||||
|
@ -55,4 +62,13 @@
|
|||
# EM4-NEXT: 1 2 0.50 adds w12, w13, w14, lsl #4
|
||||
# EM4-NEXT: 1 2 0.50 sub x15, x16, x17, lsr #6
|
||||
# EM4-NEXT: 1 1 0.25 ands x18, x19, x20, lsl #8
|
||||
# EM4-NEXT: 1 2 0.50 orr w21, w22, w23, asr #10
|
||||
# EM4-NEXT: 1 2 0.50 eor w21, w22, w23, asr #10
|
||||
|
||||
# EM5-NEXT: 1 1 0.17 adds w0, w1, w2
|
||||
# EM5-NEXT: 1 2 0.50 sub x3, x4, x5, lsr #1
|
||||
# EM5-NEXT: 1 1 0.25 ands x6, x7, x8, lsl #2
|
||||
# EM5-NEXT: 1 2 0.33 orr w9, w10, w11, asr #3
|
||||
# EM5-NEXT: 1 2 0.33 adds w12, w13, w14, lsl #4
|
||||
# EM5-NEXT: 1 2 0.50 sub x15, x16, x17, lsr #6
|
||||
# EM5-NEXT: 1 1 0.25 ands x18, x19, x20, lsl #8
|
||||
# EM5-NEXT: 1 2 0.33 eor w21, w22, w23, asr #10
|
||||
|
|
|
@ -0,0 +1,82 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
|
||||
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3
|
||||
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4
|
||||
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5
|
||||
|
||||
stur x0, [sp, #8]
|
||||
strb w0, [sp], #1
|
||||
strh w0, [sp, #2]!
|
||||
str x0, [sp, #8]
|
||||
strb w0, [sp, x31]
|
||||
strh w0, [sp, x31, lsl #1]
|
||||
str w0, [sp, w31, sxtw]
|
||||
str x0, [sp, w31, uxtw #3]
|
||||
stnp w0, w1, [sp, #8]
|
||||
stp x0, x1, [sp], #16
|
||||
stp w0, w1, [sp, #8]!
|
||||
|
||||
# ALL: Iterations: 100
|
||||
# ALL-NEXT: Instructions: 1100
|
||||
# ALL-NEXT: Total Cycles: 1303
|
||||
|
||||
# M3-NEXT: Total uOps: 1300
|
||||
# M4-NEXT: Total uOps: 1100
|
||||
# M5-NEXT: Total uOps: 1100
|
||||
|
||||
# ALL: Dispatch Width: 6
|
||||
|
||||
# M3-NEXT: uOps Per Cycle: 1.00
|
||||
# M4-NEXT: uOps Per Cycle: 0.84
|
||||
# M5-NEXT: uOps Per Cycle: 0.84
|
||||
|
||||
# ALL-NEXT: IPC: 0.84
|
||||
|
||||
# M3-NEXT: Block RThroughput: 11.0
|
||||
# M4-NEXT: Block RThroughput: 5.5
|
||||
# M5-NEXT: Block RThroughput: 5.5
|
||||
|
||||
# ALL: Instruction Info:
|
||||
# ALL-NEXT: [1]: #uOps
|
||||
# ALL-NEXT: [2]: Latency
|
||||
# ALL-NEXT: [3]: RThroughput
|
||||
# ALL-NEXT: [4]: MayLoad
|
||||
# ALL-NEXT: [5]: MayStore
|
||||
# ALL-NEXT: [6]: HasSideEffects (U)
|
||||
|
||||
# ALL: [1] [2] [3] [4] [5] [6] Instructions:
|
||||
|
||||
# M3-NEXT: 1 1 1.00 * stur x0, [sp, #8]
|
||||
# M3-NEXT: 1 1 1.00 * strb w0, [sp], #1
|
||||
# M3-NEXT: 1 1 1.00 * strh w0, [sp, #2]!
|
||||
# M3-NEXT: 1 1 1.00 * str x0, [sp, #8]
|
||||
# M3-NEXT: 1 1 1.00 * strb w0, [sp, xzr]
|
||||
# M3-NEXT: 1 1 1.00 * strh w0, [sp, xzr, lsl #1]
|
||||
# M3-NEXT: 2 2 1.00 * str w0, [sp, wzr, sxtw]
|
||||
# M3-NEXT: 2 2 1.00 * str x0, [sp, wzr, uxtw #3]
|
||||
# M3-NEXT: 1 1 1.00 * stnp w0, w1, [sp, #8]
|
||||
# M3-NEXT: 1 1 1.00 * stp x0, x1, [sp], #16
|
||||
# M3-NEXT: 1 1 1.00 * stp w0, w1, [sp, #8]!
|
||||
|
||||
# M4-NEXT: 1 1 0.50 * stur x0, [sp, #8]
|
||||
# M4-NEXT: 1 1 0.50 * strb w0, [sp], #1
|
||||
# M4-NEXT: 1 1 0.50 * strh w0, [sp, #2]!
|
||||
# M4-NEXT: 1 1 0.50 * str x0, [sp, #8]
|
||||
# M4-NEXT: 1 1 0.50 * strb w0, [sp, xzr]
|
||||
# M4-NEXT: 1 1 0.50 * strh w0, [sp, xzr, lsl #1]
|
||||
# M4-NEXT: 1 2 0.50 * str w0, [sp, wzr, sxtw]
|
||||
# M4-NEXT: 1 2 0.50 * str x0, [sp, wzr, uxtw #3]
|
||||
# M4-NEXT: 1 1 0.50 * stnp w0, w1, [sp, #8]
|
||||
# M4-NEXT: 1 1 0.50 * stp x0, x1, [sp], #16
|
||||
# M4-NEXT: 1 1 0.50 * stp w0, w1, [sp, #8]!
|
||||
|
||||
# M5-NEXT: 1 1 0.50 * stur x0, [sp, #8]
|
||||
# M5-NEXT: 1 1 0.50 * strb w0, [sp], #1
|
||||
# M5-NEXT: 1 1 0.50 * strh w0, [sp, #2]!
|
||||
# M5-NEXT: 1 1 0.50 * str x0, [sp, #8]
|
||||
# M5-NEXT: 1 1 0.50 * strb w0, [sp, xzr]
|
||||
# M5-NEXT: 1 1 0.50 * strh w0, [sp, xzr, lsl #1]
|
||||
# M5-NEXT: 1 2 0.50 * str w0, [sp, wzr, sxtw]
|
||||
# M5-NEXT: 1 2 0.50 * str x0, [sp, wzr, uxtw #3]
|
||||
# M5-NEXT: 1 1 0.50 * stnp w0, w1, [sp, #8]
|
||||
# M5-NEXT: 1 1 0.50 * stp x0, x1, [sp], #16
|
||||
# M5-NEXT: 1 1 0.50 * stp w0, w1, [sp, #8]!
|
|
@ -1,6 +1,7 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
|
||||
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3
|
||||
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4
|
||||
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5
|
||||
|
||||
mov x0, x1
|
||||
mov sp, x0
|
||||
|
@ -22,21 +23,13 @@
|
|||
|
||||
# ALL: Iterations: 100
|
||||
# ALL-NEXT: Instructions: 1000
|
||||
|
||||
# M3-NEXT: Total Cycles: 172
|
||||
# M4-NEXT: Total Cycles: 172
|
||||
|
||||
# ALL-NEXT: Total Cycles: 172
|
||||
# ALL-NEXT: Total uOps: 1000
|
||||
|
||||
# M3: Dispatch Width: 6
|
||||
# M3-NEXT: uOps Per Cycle: 5.81
|
||||
# M3-NEXT: IPC: 5.81
|
||||
# M3-NEXT: Block RThroughput: 1.7
|
||||
|
||||
# M4: Dispatch Width: 6
|
||||
# M4-NEXT: uOps Per Cycle: 5.81
|
||||
# M4-NEXT: IPC: 5.81
|
||||
# M4-NEXT: Block RThroughput: 1.7
|
||||
# ALL: Dispatch Width: 6
|
||||
# ALL-NEXT: uOps Per Cycle: 5.81
|
||||
# ALL-NEXT: IPC: 5.81
|
||||
# ALL-NEXT: Block RThroughput: 1.7
|
||||
|
||||
# ALL: Instruction Info:
|
||||
# ALL-NEXT: [1]: #uOps
|
||||
|
@ -47,25 +40,21 @@
|
|||
# ALL-NEXT: [6]: HasSideEffects (U)
|
||||
|
||||
# ALL: [1] [2] [3] [4] [5] [6] Instructions:
|
||||
# ALL-NEXT: 1 0 0.17 mov x0, x1
|
||||
# ALL-NEXT: 1 0 0.17 mov sp, x0
|
||||
# ALL-NEXT: 1 0 0.17 mov w0, #12816
|
||||
|
||||
# M3-NEXT: 1 0 0.17 mov x0, x1
|
||||
# M3-NEXT: 1 0 0.17 mov sp, x0
|
||||
# M3-NEXT: 1 0 0.17 mov w0, #12816
|
||||
# M3-NEXT: 1 1 0.25 add w0, w1, #0
|
||||
# M3-NEXT: 1 0 0.17 adr x0, {{\.?}}Ltmp0
|
||||
# M3-NEXT: 1 4 0.50 * ldr x0, [x0]
|
||||
# M3-NEXT: 1 0 0.17 adrp x0, {{\.?}}Ltmp0
|
||||
# M3-NEXT: 1 1 0.25 add x0, x0, :lo12:{{\.?}}Ltmp0
|
||||
# M3-NEXT: 1 1 0.33 fmov s0, s1
|
||||
# M3-NEXT: 1 0 0.17 movi d0, #0000000000000000
|
||||
|
||||
# M4-NEXT: 1 0 0.17 mov x0, x1
|
||||
# M4-NEXT: 1 0 0.17 mov sp, x0
|
||||
# M4-NEXT: 1 0 0.17 mov w0, #12816
|
||||
# M4-NEXT: 1 1 0.25 add w0, w1, #0
|
||||
# M4-NEXT: 1 0 0.17 adr x0, {{\.?}}Ltmp0
|
||||
# M4-NEXT: 1 4 0.50 * ldr x0, [x0]
|
||||
# M4-NEXT: 1 0 0.17 adrp x0, {{\.?}}Ltmp0
|
||||
# M4-NEXT: 1 1 0.25 add x0, x0, :lo12:{{\.?}}Ltmp0
|
||||
# M5-NEXT: 1 1 0.17 add w0, w1, #0
|
||||
|
||||
# ALL-NEXT: 1 0 0.17 adr x0, {{\.?}}Ltmp0
|
||||
# ALL-NEXT: 1 4 0.50 * ldr x0, [x0]
|
||||
# ALL-NEXT: 1 0 0.17 adrp x0, {{\.?}}Ltmp0
|
||||
# ALL-NEXT: 1 1 0.25 add x0, x0, :lo12:{{\.?}}Ltmp0
|
||||
|
||||
# M3-NEXT: 1 1 0.33 fmov s0, s1
|
||||
# M4-NEXT: 1 1 0.33 fmov s0, s1
|
||||
# M4-NEXT: 1 0 0.17 movi d0, #0000000000000000
|
||||
# M5-NEXT: 1 2 0.33 fmov s0, s1
|
||||
|
||||
# ALL-NEXT: 1 0 0.17 movi d0, #0000000000000000
|
||||
|
|
Loading…
Reference in New Issue