[AArch64] Add the pipeline model for Exynos M5

Add the scheduling and cost models for Exynos M5.
This commit is contained in:
Evandro Menezes 2019-09-23 12:59:25 -05:00
parent 25f33d8318
commit 9bdfee2a3b
30 changed files with 3349 additions and 35 deletions

View File

@ -450,6 +450,7 @@ include "AArch64SchedFalkor.td"
include "AArch64SchedKryo.td" include "AArch64SchedKryo.td"
include "AArch64SchedExynosM3.td" include "AArch64SchedExynosM3.td"
include "AArch64SchedExynosM4.td" include "AArch64SchedExynosM4.td"
include "AArch64SchedExynosM5.td"
include "AArch64SchedThunderX.td" include "AArch64SchedThunderX.td"
include "AArch64SchedThunderX2T99.td" include "AArch64SchedThunderX2T99.td"
@ -790,7 +791,7 @@ def : ProcessorModel<"neoverse-n1", CortexA57Model, [ProcNeoverseN1]>;
def : ProcessorModel<"cyclone", CycloneModel, [ProcCyclone]>; def : ProcessorModel<"cyclone", CycloneModel, [ProcCyclone]>;
def : ProcessorModel<"exynos-m3", ExynosM3Model, [ProcExynosM3]>; def : ProcessorModel<"exynos-m3", ExynosM3Model, [ProcExynosM3]>;
def : ProcessorModel<"exynos-m4", ExynosM4Model, [ProcExynosM4]>; def : ProcessorModel<"exynos-m4", ExynosM4Model, [ProcExynosM4]>;
def : ProcessorModel<"exynos-m5", ExynosM4Model, [ProcExynosM4]>; def : ProcessorModel<"exynos-m5", ExynosM5Model, [ProcExynosM4]>;
def : ProcessorModel<"falkor", FalkorModel, [ProcFalkor]>; def : ProcessorModel<"falkor", FalkorModel, [ProcFalkor]>;
def : ProcessorModel<"saphira", FalkorModel, [ProcSaphira]>; def : ProcessorModel<"saphira", FalkorModel, [ProcSaphira]>;
def : ProcessorModel<"kryo", KryoModel, [ProcKryo]>; def : ProcessorModel<"kryo", KryoModel, [ProcKryo]>;

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,57 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5
aese v0.16b, v1.16b
aesmc v0.16b, v0.16b
aesd v0.16b, v1.16b
aesimc v0.16b, v0.16b
# ALL: Iterations: 100
# ALL-NEXT: Instructions: 400
# M3-NEXT: Total Cycles: 203
# M4-NEXT: Total Cycles: 203
# M5-NEXT: Total Cycles: 403
# ALL-NEXT: Total uOps: 400
# ALL: Dispatch Width: 6
# M3-NEXT: uOps Per Cycle: 1.97
# M3-NEXT: IPC: 1.97
# M4-NEXT: uOps Per Cycle: 1.97
# M4-NEXT: IPC: 1.97
# M5-NEXT: uOps Per Cycle: 0.99
# M5-NEXT: IPC: 0.99
# ALL-NEXT: Block RThroughput: 2.0
# ALL: Instruction Info:
# ALL-NEXT: [1]: #uOps
# ALL-NEXT: [2]: Latency
# ALL-NEXT: [3]: RThroughput
# ALL-NEXT: [4]: MayLoad
# ALL-NEXT: [5]: MayStore
# ALL-NEXT: [6]: HasSideEffects (U)
# ALL: [1] [2] [3] [4] [5] [6] Instructions:
# M3-NEXT: 1 1 0.50 aese v0.16b, v1.16b
# M3-NEXT: 1 1 0.50 aesmc v0.16b, v0.16b
# M3-NEXT: 1 1 0.50 aesd v0.16b, v1.16b
# M3-NEXT: 1 1 0.50 aesimc v0.16b, v0.16b
# M4-NEXT: 1 1 0.50 aese v0.16b, v1.16b
# M4-NEXT: 1 1 0.50 aesmc v0.16b, v0.16b
# M4-NEXT: 1 1 0.50 aesd v0.16b, v1.16b
# M4-NEXT: 1 1 0.50 aesimc v0.16b, v0.16b
# M5-NEXT: 1 2 0.50 aese v0.16b, v1.16b
# M5-NEXT: 1 2 0.50 aesmc v0.16b, v0.16b
# M5-NEXT: 1 2 0.50 aesd v0.16b, v1.16b
# M5-NEXT: 1 2 0.50 aesimc v0.16b, v0.16b

View File

@ -0,0 +1,189 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5
ld1 {v0.s}[0], [sp]
ld1r {v0.2s}, [sp]
ld1 {v0.2s}, [sp]
ld1 {v0.2s, v1.2s}, [sp]
ld1 {v0.2s, v1.2s, v2.2s}, [sp]
ld1 {v0.2s, v1.2s, v2.2s, v3.2s}, [sp]
ld1 {v0.d}[0], [sp]
ld1r {v0.2d}, [sp]
ld1 {v0.2d}, [sp]
ld1 {v0.2d, v1.2d}, [sp]
ld1 {v0.2d, v1.2d, v2.2d}, [sp]
ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [sp]
ld1 {v0.s}[0], [sp], #4
ld1r {v0.2s}, [sp], #4
ld1 {v0.2s}, [sp], #8
ld1 {v0.2s, v1.2s}, [sp], #16
ld1 {v0.2s, v1.2s, v2.2s}, [sp], #24
ld1 {v0.2s, v1.2s, v2.2s, v3.2s}, [sp], #32
ld1 {v0.d}[0], [sp], #8
ld1r {v0.2d}, [sp], #8
ld1 {v0.2d}, [sp], #16
ld1 {v0.2d, v1.2d}, [sp], #32
ld1 {v0.2d, v1.2d, v2.2d}, [sp], #48
ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [sp], #64
ld1 {v0.s}[0], [sp], x0
ld1r {v0.2s}, [sp], x0
ld1 {v0.2s}, [sp], x0
ld1 {v0.2s, v1.2s}, [sp], x0
ld1 {v0.2s, v1.2s, v2.2s}, [sp], x0
ld1 {v0.2s, v1.2s, v2.2s, v3.2s}, [sp], x0
ld1 {v0.d}[0], [sp], x0
ld1r {v0.2d}, [sp], x0
ld1 {v0.2d}, [sp], x0
ld1 {v0.2d, v1.2d}, [sp], x0
ld1 {v0.2d, v1.2d, v2.2d}, [sp], x0
ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [sp], x0
# ALL: Iterations: 100
# ALL-NEXT: Instructions: 3600
# M3-NEXT: Total Cycles: 14903
# M4-NEXT: Total Cycles: 14703
# M5-NEXT: Total Cycles: 17203
# ALL-NEXT: Total uOps: 10200
# ALL: Dispatch Width: 6
# M3-NEXT: uOps Per Cycle: 0.68
# M3-NEXT: IPC: 0.24
# M4-NEXT: uOps Per Cycle: 0.69
# M4-NEXT: IPC: 0.24
# M5-NEXT: uOps Per Cycle: 0.59
# M5-NEXT: IPC: 0.21
# ALL-NEXT: Block RThroughput: 39.0
# ALL: Instruction Info:
# ALL-NEXT: [1]: #uOps
# ALL-NEXT: [2]: Latency
# ALL-NEXT: [3]: RThroughput
# ALL-NEXT: [4]: MayLoad
# ALL-NEXT: [5]: MayStore
# ALL-NEXT: [6]: HasSideEffects (U)
# ALL: [1] [2] [3] [4] [5] [6] Instructions:
# M3-NEXT: 2 7 1.00 * ld1 { v0.s }[0], [sp]
# M3-NEXT: 1 5 0.50 * ld1r { v0.2s }, [sp]
# M3-NEXT: 1 5 0.50 * ld1 { v0.2s }, [sp]
# M3-NEXT: 2 5 1.00 * ld1 { v0.2s, v1.2s }, [sp]
# M3-NEXT: 3 6 1.50 * ld1 { v0.2s, v1.2s, v2.2s }, [sp]
# M3-NEXT: 4 6 2.00 * ld1 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp]
# M3-NEXT: 2 6 1.00 * ld1 { v0.d }[0], [sp]
# M3-NEXT: 1 5 0.50 * ld1r { v0.2d }, [sp]
# M3-NEXT: 1 5 0.50 * ld1 { v0.2d }, [sp]
# M3-NEXT: 2 5 1.00 * ld1 { v0.2d, v1.2d }, [sp]
# M3-NEXT: 3 6 1.50 * ld1 { v0.2d, v1.2d, v2.2d }, [sp]
# M3-NEXT: 4 6 2.00 * ld1 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp]
# M3-NEXT: 3 7 1.00 * ld1 { v0.s }[0], [sp], #4
# M3-NEXT: 2 5 0.50 * ld1r { v0.2s }, [sp], #4
# M3-NEXT: 2 5 0.50 * ld1 { v0.2s }, [sp], #8
# M3-NEXT: 3 5 1.00 * ld1 { v0.2s, v1.2s }, [sp], #16
# M3-NEXT: 4 6 1.50 * ld1 { v0.2s, v1.2s, v2.2s }, [sp], #24
# M3-NEXT: 5 6 2.00 * ld1 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], #32
# M3-NEXT: 3 6 1.00 * ld1 { v0.d }[0], [sp], #8
# M3-NEXT: 2 5 0.50 * ld1r { v0.2d }, [sp], #8
# M3-NEXT: 2 5 0.50 * ld1 { v0.2d }, [sp], #16
# M3-NEXT: 3 5 1.00 * ld1 { v0.2d, v1.2d }, [sp], #32
# M3-NEXT: 4 6 1.50 * ld1 { v0.2d, v1.2d, v2.2d }, [sp], #48
# M3-NEXT: 5 6 2.00 * ld1 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], #64
# M3-NEXT: 3 7 1.00 * ld1 { v0.s }[0], [sp], x0
# M3-NEXT: 2 5 0.50 * ld1r { v0.2s }, [sp], x0
# M3-NEXT: 2 5 0.50 * ld1 { v0.2s }, [sp], x0
# M3-NEXT: 3 5 1.00 * ld1 { v0.2s, v1.2s }, [sp], x0
# M3-NEXT: 4 6 1.50 * ld1 { v0.2s, v1.2s, v2.2s }, [sp], x0
# M3-NEXT: 5 6 2.00 * ld1 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], x0
# M3-NEXT: 3 6 1.00 * ld1 { v0.d }[0], [sp], x0
# M3-NEXT: 2 5 0.50 * ld1r { v0.2d }, [sp], x0
# M3-NEXT: 2 5 0.50 * ld1 { v0.2d }, [sp], x0
# M3-NEXT: 3 5 1.00 * ld1 { v0.2d, v1.2d }, [sp], x0
# M3-NEXT: 4 6 1.50 * ld1 { v0.2d, v1.2d, v2.2d }, [sp], x0
# M3-NEXT: 5 6 2.00 * ld1 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], x0
# M4-NEXT: 2 6 1.00 * ld1 { v0.s }[0], [sp]
# M4-NEXT: 1 5 0.50 * ld1r { v0.2s }, [sp]
# M4-NEXT: 1 5 0.50 * ld1 { v0.2s }, [sp]
# M4-NEXT: 2 5 1.00 * ld1 { v0.2s, v1.2s }, [sp]
# M4-NEXT: 3 6 1.50 * ld1 { v0.2s, v1.2s, v2.2s }, [sp]
# M4-NEXT: 4 6 2.00 * ld1 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp]
# M4-NEXT: 2 6 1.00 * ld1 { v0.d }[0], [sp]
# M4-NEXT: 1 5 0.50 * ld1r { v0.2d }, [sp]
# M4-NEXT: 1 5 0.50 * ld1 { v0.2d }, [sp]
# M4-NEXT: 2 5 1.00 * ld1 { v0.2d, v1.2d }, [sp]
# M4-NEXT: 3 6 1.50 * ld1 { v0.2d, v1.2d, v2.2d }, [sp]
# M4-NEXT: 4 6 2.00 * ld1 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp]
# M4-NEXT: 3 6 1.00 * ld1 { v0.s }[0], [sp], #4
# M4-NEXT: 2 5 0.50 * ld1r { v0.2s }, [sp], #4
# M4-NEXT: 2 5 0.50 * ld1 { v0.2s }, [sp], #8
# M4-NEXT: 3 5 1.00 * ld1 { v0.2s, v1.2s }, [sp], #16
# M4-NEXT: 4 6 1.50 * ld1 { v0.2s, v1.2s, v2.2s }, [sp], #24
# M4-NEXT: 5 6 2.00 * ld1 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], #32
# M4-NEXT: 3 6 1.00 * ld1 { v0.d }[0], [sp], #8
# M4-NEXT: 2 5 0.50 * ld1r { v0.2d }, [sp], #8
# M4-NEXT: 2 5 0.50 * ld1 { v0.2d }, [sp], #16
# M4-NEXT: 3 5 1.00 * ld1 { v0.2d, v1.2d }, [sp], #32
# M4-NEXT: 4 6 1.50 * ld1 { v0.2d, v1.2d, v2.2d }, [sp], #48
# M4-NEXT: 5 6 2.00 * ld1 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], #64
# M4-NEXT: 3 6 1.00 * ld1 { v0.s }[0], [sp], x0
# M4-NEXT: 2 5 0.50 * ld1r { v0.2s }, [sp], x0
# M4-NEXT: 2 5 0.50 * ld1 { v0.2s }, [sp], x0
# M4-NEXT: 3 5 1.00 * ld1 { v0.2s, v1.2s }, [sp], x0
# M4-NEXT: 4 6 1.50 * ld1 { v0.2s, v1.2s, v2.2s }, [sp], x0
# M4-NEXT: 5 6 2.00 * ld1 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], x0
# M4-NEXT: 3 6 1.00 * ld1 { v0.d }[0], [sp], x0
# M4-NEXT: 2 5 0.50 * ld1r { v0.2d }, [sp], x0
# M4-NEXT: 2 5 0.50 * ld1 { v0.2d }, [sp], x0
# M4-NEXT: 3 5 1.00 * ld1 { v0.2d, v1.2d }, [sp], x0
# M4-NEXT: 4 6 1.50 * ld1 { v0.2d, v1.2d, v2.2d }, [sp], x0
# M4-NEXT: 5 6 2.00 * ld1 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], x0
# M5-NEXT: 2 7 1.00 * ld1 { v0.s }[0], [sp]
# M5-NEXT: 1 6 0.50 * ld1r { v0.2s }, [sp]
# M5-NEXT: 1 6 0.50 * ld1 { v0.2s }, [sp]
# M5-NEXT: 2 6 1.00 * ld1 { v0.2s, v1.2s }, [sp]
# M5-NEXT: 3 7 1.50 * ld1 { v0.2s, v1.2s, v2.2s }, [sp]
# M5-NEXT: 4 7 2.00 * ld1 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp]
# M5-NEXT: 2 7 1.00 * ld1 { v0.d }[0], [sp]
# M5-NEXT: 1 6 0.50 * ld1r { v0.2d }, [sp]
# M5-NEXT: 1 6 0.50 * ld1 { v0.2d }, [sp]
# M5-NEXT: 2 6 1.00 * ld1 { v0.2d, v1.2d }, [sp]
# M5-NEXT: 3 7 1.50 * ld1 { v0.2d, v1.2d, v2.2d }, [sp]
# M5-NEXT: 4 7 2.00 * ld1 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp]
# M5-NEXT: 3 7 1.00 * ld1 { v0.s }[0], [sp], #4
# M5-NEXT: 2 6 0.50 * ld1r { v0.2s }, [sp], #4
# M5-NEXT: 2 6 0.50 * ld1 { v0.2s }, [sp], #8
# M5-NEXT: 3 6 1.00 * ld1 { v0.2s, v1.2s }, [sp], #16
# M5-NEXT: 4 7 1.50 * ld1 { v0.2s, v1.2s, v2.2s }, [sp], #24
# M5-NEXT: 5 7 2.00 * ld1 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], #32
# M5-NEXT: 3 7 1.00 * ld1 { v0.d }[0], [sp], #8
# M5-NEXT: 2 6 0.50 * ld1r { v0.2d }, [sp], #8
# M5-NEXT: 2 6 0.50 * ld1 { v0.2d }, [sp], #16
# M5-NEXT: 3 6 1.00 * ld1 { v0.2d, v1.2d }, [sp], #32
# M5-NEXT: 4 7 1.50 * ld1 { v0.2d, v1.2d, v2.2d }, [sp], #48
# M5-NEXT: 5 7 2.00 * ld1 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], #64
# M5-NEXT: 3 7 1.00 * ld1 { v0.s }[0], [sp], x0
# M5-NEXT: 2 6 0.50 * ld1r { v0.2s }, [sp], x0
# M5-NEXT: 2 6 0.50 * ld1 { v0.2s }, [sp], x0
# M5-NEXT: 3 6 1.00 * ld1 { v0.2s, v1.2s }, [sp], x0
# M5-NEXT: 4 7 1.50 * ld1 { v0.2s, v1.2s, v2.2s }, [sp], x0
# M5-NEXT: 5 7 2.00 * ld1 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], x0
# M5-NEXT: 3 7 1.00 * ld1 { v0.d }[0], [sp], x0
# M5-NEXT: 2 6 0.50 * ld1r { v0.2d }, [sp], x0
# M5-NEXT: 2 6 0.50 * ld1 { v0.2d }, [sp], x0
# M5-NEXT: 3 6 1.00 * ld1 { v0.2d, v1.2d }, [sp], x0
# M5-NEXT: 4 7 1.50 * ld1 { v0.2d, v1.2d, v2.2d }, [sp], x0
# M5-NEXT: 5 7 2.00 * ld1 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], x0

View File

@ -0,0 +1,118 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5
ld2 {v0.s, v1.s}[0], [sp]
ld2r {v0.2s, v1.2s}, [sp]
ld2 {v0.2s, v1.2s}, [sp]
ld2 {v0.d, v1.d}[0], [sp]
ld2r {v0.2d, v1.2d}, [sp]
ld2 {v0.2d, v1.2d}, [sp]
ld2 {v0.s, v1.s}[0], [sp], #8
ld2r {v0.2s, v1.2s}, [sp], #8
ld2 {v0.2s, v1.2s}, [sp], #16
ld2 {v0.d, v1.d}[0], [sp], #16
ld2r {v0.2d, v1.2d}, [sp], #16
ld2 {v0.2d, v1.2d}, [sp], #32
ld2 {v0.s, v1.s}[0], [sp], x0
ld2r {v0.2s, v1.2s}, [sp], x0
ld2 {v0.2s, v1.2s}, [sp], x0
ld2 {v0.d, v1.d}[0], [sp], x0
ld2r {v0.2d, v1.2d}, [sp], x0
ld2 {v0.2d, v1.2d}, [sp], x0
# ALL: Iterations: 100
# ALL-NEXT: Instructions: 1800
# M3-NEXT: Total Cycles: 10003
# M4-NEXT: Total Cycles: 9803
# M5-NEXT: Total Cycles: 11103
# ALL-NEXT: Total uOps: 5400
# ALL: Dispatch Width: 6
# M3-NEXT: uOps Per Cycle: 0.54
# M3-NEXT: IPC: 0.18
# M3-NEXT: Block RThroughput: 42.0
# M4-NEXT: uOps Per Cycle: 0.55
# M4-NEXT: IPC: 0.18
# M4-NEXT: Block RThroughput: 30.0
# M5-NEXT: uOps Per Cycle: 0.49
# M5-NEXT: IPC: 0.16
# M5-NEXT: Block RThroughput: 45.0
# ALL: Instruction Info:
# ALL-NEXT: [1]: #uOps
# ALL-NEXT: [2]: Latency
# ALL-NEXT: [3]: RThroughput
# ALL-NEXT: [4]: MayLoad
# ALL-NEXT: [5]: MayStore
# ALL-NEXT: [6]: HasSideEffects (U)
# ALL: [1] [2] [3] [4] [5] [6] Instructions:
# M3-NEXT: 3 7 1.00 * ld2 { v0.s, v1.s }[0], [sp]
# M3-NEXT: 2 5 1.00 * ld2r { v0.2s, v1.2s }, [sp]
# M3-NEXT: 2 10 5.00 * ld2 { v0.2s, v1.2s }, [sp]
# M3-NEXT: 3 6 1.00 * ld2 { v0.d, v1.d }[0], [sp]
# M3-NEXT: 2 5 1.00 * ld2r { v0.2d, v1.2d }, [sp]
# M3-NEXT: 2 10 5.00 * ld2 { v0.2d, v1.2d }, [sp]
# M3-NEXT: 4 7 1.00 * ld2 { v0.s, v1.s }[0], [sp], #8
# M3-NEXT: 3 5 1.00 * ld2r { v0.2s, v1.2s }, [sp], #8
# M3-NEXT: 3 10 5.00 * ld2 { v0.2s, v1.2s }, [sp], #16
# M3-NEXT: 4 6 1.00 * ld2 { v0.d, v1.d }[0], [sp], #16
# M3-NEXT: 3 5 1.00 * ld2r { v0.2d, v1.2d }, [sp], #16
# M3-NEXT: 3 10 5.00 * ld2 { v0.2d, v1.2d }, [sp], #32
# M3-NEXT: 4 7 1.00 * ld2 { v0.s, v1.s }[0], [sp], x0
# M3-NEXT: 3 5 1.00 * ld2r { v0.2s, v1.2s }, [sp], x0
# M3-NEXT: 3 10 5.00 * ld2 { v0.2s, v1.2s }, [sp], x0
# M3-NEXT: 4 6 1.00 * ld2 { v0.d, v1.d }[0], [sp], x0
# M3-NEXT: 3 5 1.00 * ld2r { v0.2d, v1.2d }, [sp], x0
# M3-NEXT: 3 10 5.00 * ld2 { v0.2d, v1.2d }, [sp], x0
# M4-NEXT: 3 6 1.00 * ld2 { v0.s, v1.s }[0], [sp]
# M4-NEXT: 2 5 1.00 * ld2r { v0.2s, v1.2s }, [sp]
# M4-NEXT: 2 10 3.00 * ld2 { v0.2s, v1.2s }, [sp]
# M4-NEXT: 3 6 1.00 * ld2 { v0.d, v1.d }[0], [sp]
# M4-NEXT: 2 5 1.00 * ld2r { v0.2d, v1.2d }, [sp]
# M4-NEXT: 2 10 3.00 * ld2 { v0.2d, v1.2d }, [sp]
# M4-NEXT: 4 6 1.00 * ld2 { v0.s, v1.s }[0], [sp], #8
# M4-NEXT: 3 5 1.00 * ld2r { v0.2s, v1.2s }, [sp], #8
# M4-NEXT: 3 10 3.00 * ld2 { v0.2s, v1.2s }, [sp], #16
# M4-NEXT: 4 6 1.00 * ld2 { v0.d, v1.d }[0], [sp], #16
# M4-NEXT: 3 5 1.00 * ld2r { v0.2d, v1.2d }, [sp], #16
# M4-NEXT: 3 10 3.00 * ld2 { v0.2d, v1.2d }, [sp], #32
# M4-NEXT: 4 6 1.00 * ld2 { v0.s, v1.s }[0], [sp], x0
# M4-NEXT: 3 5 1.00 * ld2r { v0.2s, v1.2s }, [sp], x0
# M4-NEXT: 3 10 3.00 * ld2 { v0.2s, v1.2s }, [sp], x0
# M4-NEXT: 4 6 1.00 * ld2 { v0.d, v1.d }[0], [sp], x0
# M4-NEXT: 3 5 1.00 * ld2r { v0.2d, v1.2d }, [sp], x0
# M4-NEXT: 3 10 3.00 * ld2 { v0.2d, v1.2d }, [sp], x0
# M5-NEXT: 3 7 1.00 * ld2 { v0.s, v1.s }[0], [sp]
# M5-NEXT: 2 6 1.00 * ld2r { v0.2s, v1.2s }, [sp]
# M5-NEXT: 2 11 5.50 * ld2 { v0.2s, v1.2s }, [sp]
# M5-NEXT: 3 7 1.00 * ld2 { v0.d, v1.d }[0], [sp]
# M5-NEXT: 2 6 1.00 * ld2r { v0.2d, v1.2d }, [sp]
# M5-NEXT: 2 11 5.50 * ld2 { v0.2d, v1.2d }, [sp]
# M5-NEXT: 4 7 1.00 * ld2 { v0.s, v1.s }[0], [sp], #8
# M5-NEXT: 3 6 1.00 * ld2r { v0.2s, v1.2s }, [sp], #8
# M5-NEXT: 3 11 5.50 * ld2 { v0.2s, v1.2s }, [sp], #16
# M5-NEXT: 4 7 1.00 * ld2 { v0.d, v1.d }[0], [sp], #16
# M5-NEXT: 3 6 1.00 * ld2r { v0.2d, v1.2d }, [sp], #16
# M5-NEXT: 3 11 5.50 * ld2 { v0.2d, v1.2d }, [sp], #32
# M5-NEXT: 4 7 1.00 * ld2 { v0.s, v1.s }[0], [sp], x0
# M5-NEXT: 3 6 1.00 * ld2r { v0.2s, v1.2s }, [sp], x0
# M5-NEXT: 3 11 5.50 * ld2 { v0.2s, v1.2s }, [sp], x0
# M5-NEXT: 4 7 1.00 * ld2 { v0.d, v1.d }[0], [sp], x0
# M5-NEXT: 3 6 1.00 * ld2r { v0.2d, v1.2d }, [sp], x0
# M5-NEXT: 3 11 5.50 * ld2 { v0.2d, v1.2d }, [sp], x0

View File

@ -0,0 +1,118 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5
ld3 {v0.s, v1.s, v2.s}[0], [sp]
ld3r {v0.2s, v1.2s, v2.2s}, [sp]
ld3 {v0.2s, v1.2s, v2.2s}, [sp]
ld3 {v0.d, v1.d, v2.d}[0], [sp]
ld3r {v0.2d, v1.2d, v2.2d}, [sp]
ld3 {v0.2d, v1.2d, v2.2d}, [sp]
ld3 {v0.s, v1.s, v2.s}[0], [sp], #12
ld3r {v0.2s, v1.2s, v2.2s}, [sp], #12
ld3 {v0.2s, v1.2s, v2.2s}, [sp], #24
ld3 {v0.d, v1.d, v2.d}[0], [sp], #24
ld3r {v0.2d, v1.2d, v2.2d}, [sp], #24
ld3 {v0.2d, v1.2d, v2.2d}, [sp], #48
ld3 {v0.s, v1.s, v2.s}[0], [sp], x0
ld3r {v0.2s, v1.2s, v2.2s}, [sp], x0
ld3 {v0.2s, v1.2s, v2.2s}, [sp], x0
ld3 {v0.d, v1.d, v2.d}[0], [sp], x0
ld3r {v0.2d, v1.2d, v2.2d}, [sp], x0
ld3 {v0.2d, v1.2d, v2.2d}, [sp], x0
# ALL: Iterations: 100
# ALL-NEXT: Instructions: 1800
# M3-NEXT: Total Cycles: 12501
# M4-NEXT: Total Cycles: 11804
# M5-NEXT: Total Cycles: 12903
# ALL-NEXT: Total uOps: 7500
# ALL: Dispatch Width: 6
# M3-NEXT: uOps Per Cycle: 0.60
# M3-NEXT: IPC: 0.14
# M3-NEXT: Block RThroughput: 84.0
# M4-NEXT: uOps Per Cycle: 0.64
# M4-NEXT: IPC: 0.15
# M4-NEXT: Block RThroughput: 54.0
# M5-NEXT: uOps Per Cycle: 0.58
# M5-NEXT: IPC: 0.14
# M5-NEXT: Block RThroughput: 22.5
# ALL: Instruction Info:
# ALL-NEXT: [1]: #uOps
# ALL-NEXT: [2]: Latency
# ALL-NEXT: [3]: RThroughput
# ALL-NEXT: [4]: MayLoad
# ALL-NEXT: [5]: MayStore
# ALL-NEXT: [6]: HasSideEffects (U)
# ALL: [1] [2] [3] [4] [5] [6] Instructions:
# M3-NEXT: 4 7 1.00 * ld3 { v0.s, v1.s, v2.s }[0], [sp]
# M3-NEXT: 3 6 1.50 * ld3r { v0.2s, v1.2s, v2.2s }, [sp]
# M3-NEXT: 3 12 9.00 * ld3 { v0.2s, v1.2s, v2.2s }, [sp]
# M3-NEXT: 5 6 6.00 * ld3 { v0.d, v1.d, v2.d }[0], [sp]
# M3-NEXT: 3 6 1.50 * ld3r { v0.2d, v1.2d, v2.2d }, [sp]
# M3-NEXT: 3 12 9.00 * ld3 { v0.2d, v1.2d, v2.2d }, [sp]
# M3-NEXT: 5 7 1.00 * ld3 { v0.s, v1.s, v2.s }[0], [sp], #12
# M3-NEXT: 4 6 1.50 * ld3r { v0.2s, v1.2s, v2.2s }, [sp], #12
# M3-NEXT: 4 12 9.00 * ld3 { v0.2s, v1.2s, v2.2s }, [sp], #24
# M3-NEXT: 6 6 6.00 * ld3 { v0.d, v1.d, v2.d }[0], [sp], #24
# M3-NEXT: 4 6 1.50 * ld3r { v0.2d, v1.2d, v2.2d }, [sp], #24
# M3-NEXT: 4 12 9.00 * ld3 { v0.2d, v1.2d, v2.2d }, [sp], #48
# M3-NEXT: 5 7 1.00 * ld3 { v0.s, v1.s, v2.s }[0], [sp], x0
# M3-NEXT: 4 6 1.50 * ld3r { v0.2s, v1.2s, v2.2s }, [sp], x0
# M3-NEXT: 4 12 9.00 * ld3 { v0.2s, v1.2s, v2.2s }, [sp], x0
# M3-NEXT: 6 6 6.00 * ld3 { v0.d, v1.d, v2.d }[0], [sp], x0
# M3-NEXT: 4 6 1.50 * ld3r { v0.2d, v1.2d, v2.2d }, [sp], x0
# M3-NEXT: 4 12 9.00 * ld3 { v0.2d, v1.2d, v2.2d }, [sp], x0
# M4-NEXT: 4 7 1.50 * ld3 { v0.s, v1.s, v2.s }[0], [sp]
# M4-NEXT: 3 6 1.50 * ld3r { v0.2s, v1.2s, v2.2s }, [sp]
# M4-NEXT: 3 12 4.50 * ld3 { v0.2s, v1.2s, v2.2s }, [sp]
# M4-NEXT: 5 7 4.50 * ld3 { v0.d, v1.d, v2.d }[0], [sp]
# M4-NEXT: 3 6 1.50 * ld3r { v0.2d, v1.2d, v2.2d }, [sp]
# M4-NEXT: 3 12 4.50 * ld3 { v0.2d, v1.2d, v2.2d }, [sp]
# M4-NEXT: 5 7 1.50 * ld3 { v0.s, v1.s, v2.s }[0], [sp], #12
# M4-NEXT: 4 6 1.50 * ld3r { v0.2s, v1.2s, v2.2s }, [sp], #12
# M4-NEXT: 4 12 4.50 * ld3 { v0.2s, v1.2s, v2.2s }, [sp], #24
# M4-NEXT: 6 7 4.50 * ld3 { v0.d, v1.d, v2.d }[0], [sp], #24
# M4-NEXT: 4 6 1.50 * ld3r { v0.2d, v1.2d, v2.2d }, [sp], #24
# M4-NEXT: 4 12 4.50 * ld3 { v0.2d, v1.2d, v2.2d }, [sp], #48
# M4-NEXT: 5 7 1.50 * ld3 { v0.s, v1.s, v2.s }[0], [sp], x0
# M4-NEXT: 4 6 1.50 * ld3r { v0.2s, v1.2s, v2.2s }, [sp], x0
# M4-NEXT: 4 12 4.50 * ld3 { v0.2s, v1.2s, v2.2s }, [sp], x0
# M4-NEXT: 6 7 4.50 * ld3 { v0.d, v1.d, v2.d }[0], [sp], x0
# M4-NEXT: 4 6 1.50 * ld3r { v0.2d, v1.2d, v2.2d }, [sp], x0
# M4-NEXT: 4 12 4.50 * ld3 { v0.2d, v1.2d, v2.2d }, [sp], x0
# M5-NEXT: 4 8 1.50 * ld3 { v0.s, v1.s, v2.s }[0], [sp]
# M5-NEXT: 3 7 1.50 * ld3r { v0.2s, v1.2s, v2.2s }, [sp]
# M5-NEXT: 3 13 1.50 * ld3 { v0.2s, v1.2s, v2.2s }, [sp]
# M5-NEXT: 5 8 1.50 * ld3 { v0.d, v1.d, v2.d }[0], [sp]
# M5-NEXT: 3 7 1.50 * ld3r { v0.2d, v1.2d, v2.2d }, [sp]
# M5-NEXT: 3 13 1.50 * ld3 { v0.2d, v1.2d, v2.2d }, [sp]
# M5-NEXT: 5 8 1.50 * ld3 { v0.s, v1.s, v2.s }[0], [sp], #12
# M5-NEXT: 4 7 1.50 * ld3r { v0.2s, v1.2s, v2.2s }, [sp], #12
# M5-NEXT: 4 13 1.50 * ld3 { v0.2s, v1.2s, v2.2s }, [sp], #24
# M5-NEXT: 6 8 1.50 * ld3 { v0.d, v1.d, v2.d }[0], [sp], #24
# M5-NEXT: 4 7 1.50 * ld3r { v0.2d, v1.2d, v2.2d }, [sp], #24
# M5-NEXT: 4 13 1.50 * ld3 { v0.2d, v1.2d, v2.2d }, [sp], #48
# M5-NEXT: 5 8 1.50 * ld3 { v0.s, v1.s, v2.s }[0], [sp], x0
# M5-NEXT: 4 7 1.50 * ld3r { v0.2s, v1.2s, v2.2s }, [sp], x0
# M5-NEXT: 4 13 1.50 * ld3 { v0.2s, v1.2s, v2.2s }, [sp], x0
# M5-NEXT: 6 8 1.50 * ld3 { v0.d, v1.d, v2.d }[0], [sp], x0
# M5-NEXT: 4 7 1.50 * ld3r { v0.2d, v1.2d, v2.2d }, [sp], x0
# M5-NEXT: 4 13 1.50 * ld3 { v0.2d, v1.2d, v2.2d }, [sp], x0

View File

@ -0,0 +1,118 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5
ld4 {v0.s, v1.s, v2.s, v3.s}[0], [sp]
ld4r {v0.2s, v1.2s, v2.2s, v3.2s}, [sp]
ld4 {v0.2s, v1.2s, v2.2s, v3.2s}, [sp]
ld4 {v0.d, v1.d, v2.d, v3.d}[0], [sp]
ld4r {v0.2d, v1.2d, v2.2d, v3.2d}, [sp]
ld4 {v0.2d, v1.2d, v2.2d, v3.2d}, [sp]
ld4 {v0.s, v1.s, v2.s, v3.s}[0], [sp], #16
ld4r {v0.2s, v1.2s, v2.2s, v3.2s}, [sp], #16
ld4 {v0.2s, v1.2s, v2.2s, v3.2s}, [sp], #32
ld4 {v0.d, v1.d, v2.d, v3.d}[0], [sp], #32
ld4r {v0.2d, v1.2d, v2.2d, v3.2d}, [sp], #32
ld4 {v0.2d, v1.2d, v2.2d, v3.2d}, [sp], #64
ld4 {v0.s, v1.s, v2.s, v3.s}[0], [sp], x0
ld4r {v0.2s, v1.2s, v2.2s, v3.2s}, [sp], x0
ld4 {v0.2s, v1.2s, v2.2s, v3.2s}, [sp], x0
ld4 {v0.d, v1.d, v2.d, v3.d}[0], [sp], x0
ld4r {v0.2d, v1.2d, v2.2d, v3.2d}, [sp], x0
ld4 {v0.2d, v1.2d, v2.2d, v3.2d}, [sp], x0
# ALL: Iterations: 100
# ALL-NEXT: Instructions: 1800
# M3-NEXT: Total Cycles: 15598
# M4-NEXT: Total Cycles: 13004
# M5-NEXT: Total Cycles: 14304
# ALL-NEXT: Total uOps: 9300
# ALL: Dispatch Width: 6
# M3-NEXT: uOps Per Cycle: 0.60
# M3-NEXT: IPC: 0.12
# M3-NEXT: Block RThroughput: 108.0
# M4-NEXT: uOps Per Cycle: 0.72
# M4-NEXT: IPC: 0.14
# M4-NEXT: Block RThroughput: 61.5
# M5-NEXT: uOps Per Cycle: 0.65
# M5-NEXT: IPC: 0.13
# M5-NEXT: Block RThroughput: 40.5
# ALL: Instruction Info:
# ALL-NEXT: [1]: #uOps
# ALL-NEXT: [2]: Latency
# ALL-NEXT: [3]: RThroughput
# ALL-NEXT: [4]: MayLoad
# ALL-NEXT: [5]: MayStore
# ALL-NEXT: [6]: HasSideEffects (U)
# ALL: [1] [2] [3] [4] [5] [6] Instructions:
# M3-NEXT: 5 9 2.00 * ld4 { v0.s, v1.s, v2.s, v3.s }[0], [sp]
# M3-NEXT: 4 6 2.00 * ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [sp]
# M3-NEXT: 4 14 12.00 * ld4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp]
# M3-NEXT: 6 7 6.00 * ld4 { v0.d, v1.d, v2.d, v3.d }[0], [sp]
# M3-NEXT: 4 6 2.00 * ld4r { v0.2d, v1.2d, v2.2d, v3.2d }, [sp]
# M3-NEXT: 4 14 12.00 * ld4 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp]
# M3-NEXT: 6 9 2.00 * ld4 { v0.s, v1.s, v2.s, v3.s }[0], [sp], #16
# M3-NEXT: 5 6 2.00 * ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], #16
# M3-NEXT: 5 14 12.00 * ld4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], #32
# M3-NEXT: 7 7 6.00 * ld4 { v0.d, v1.d, v2.d, v3.d }[0], [sp], #32
# M3-NEXT: 5 6 2.00 * ld4r { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], #32
# M3-NEXT: 5 14 12.00 * ld4 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], #64
# M3-NEXT: 6 9 2.00 * ld4 { v0.s, v1.s, v2.s, v3.s }[0], [sp], x0
# M3-NEXT: 5 6 2.00 * ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], x0
# M3-NEXT: 5 14 12.00 * ld4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], x0
# M3-NEXT: 7 7 6.00 * ld4 { v0.d, v1.d, v2.d, v3.d }[0], [sp], x0
# M3-NEXT: 5 6 2.00 * ld4r { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], x0
# M3-NEXT: 5 14 12.00 * ld4 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], x0
# M4-NEXT: 5 7 2.00 * ld4 { v0.s, v1.s, v2.s, v3.s }[0], [sp]
# M4-NEXT: 4 6 2.00 * ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [sp]
# M4-NEXT: 4 14 6.00 * ld4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp]
# M4-NEXT: 6 7 3.00 * ld4 { v0.d, v1.d, v2.d, v3.d }[0], [sp]
# M4-NEXT: 4 6 2.00 * ld4r { v0.2d, v1.2d, v2.2d, v3.2d }, [sp]
# M4-NEXT: 4 14 6.00 * ld4 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp]
# M4-NEXT: 6 7 2.00 * ld4 { v0.s, v1.s, v2.s, v3.s }[0], [sp], #16
# M4-NEXT: 5 6 2.00 * ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], #16
# M4-NEXT: 5 14 6.00 * ld4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], #32
# M4-NEXT: 7 7 3.00 * ld4 { v0.d, v1.d, v2.d, v3.d }[0], [sp], #32
# M4-NEXT: 5 6 2.00 * ld4r { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], #32
# M4-NEXT: 5 14 6.00 * ld4 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], #64
# M4-NEXT: 6 7 2.00 * ld4 { v0.s, v1.s, v2.s, v3.s }[0], [sp], x0
# M4-NEXT: 5 6 2.00 * ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], x0
# M4-NEXT: 5 14 6.00 * ld4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], x0
# M4-NEXT: 7 7 3.00 * ld4 { v0.d, v1.d, v2.d, v3.d }[0], [sp], x0
# M4-NEXT: 5 6 2.00 * ld4r { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], x0
# M4-NEXT: 5 14 6.00 * ld4 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], x0
# M5-NEXT: 5 8 2.00 * ld4 { v0.s, v1.s, v2.s, v3.s }[0], [sp]
# M5-NEXT: 4 7 2.00 * ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [sp]
# M5-NEXT: 4 15 4.00 * ld4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp]
# M5-NEXT: 6 8 2.00 * ld4 { v0.d, v1.d, v2.d, v3.d }[0], [sp]
# M5-NEXT: 4 7 2.00 * ld4r { v0.2d, v1.2d, v2.2d, v3.2d }, [sp]
# M5-NEXT: 4 15 4.00 * ld4 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp]
# M5-NEXT: 6 8 2.00 * ld4 { v0.s, v1.s, v2.s, v3.s }[0], [sp], #16
# M5-NEXT: 5 7 2.00 * ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], #16
# M5-NEXT: 5 15 4.00 * ld4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], #32
# M5-NEXT: 7 8 2.00 * ld4 { v0.d, v1.d, v2.d, v3.d }[0], [sp], #32
# M5-NEXT: 5 7 2.00 * ld4r { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], #32
# M5-NEXT: 5 15 4.00 * ld4 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], #64
# M5-NEXT: 6 8 2.00 * ld4 { v0.s, v1.s, v2.s, v3.s }[0], [sp], x0
# M5-NEXT: 5 7 2.00 * ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], x0
# M5-NEXT: 5 15 4.00 * ld4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], x0
# M5-NEXT: 7 8 2.00 * ld4 { v0.d, v1.d, v2.d, v3.d }[0], [sp], x0
# M5-NEXT: 5 7 2.00 * ld4r { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], x0
# M5-NEXT: 5 15 4.00 * ld4 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], x0

View File

@ -0,0 +1,169 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5
st1 {v0.s}[0], [sp]
st1 {v0.2s}, [sp]
st1 {v0.2s, v1.2s}, [sp]
st1 {v0.2s, v1.2s, v2.2s}, [sp]
st1 {v0.2s, v1.2s, v2.2s, v3.2s}, [sp]
st1 {v0.d}[0], [sp]
st1 {v0.2d}, [sp]
st1 {v0.2d, v1.2d}, [sp]
st1 {v0.2d, v1.2d, v2.2d}, [sp]
st1 {v0.2d, v1.2d, v2.2d, v3.2d}, [sp]
st1 {v0.s}[0], [sp], #4
st1 {v0.2s}, [sp], #8
st1 {v0.2s, v1.2s}, [sp], #16
st1 {v0.2s, v1.2s, v2.2s}, [sp], #24
st1 {v0.2s, v1.2s, v2.2s, v3.2s}, [sp], #32
st1 {v0.d}[0], [sp], #8
st1 {v0.2d}, [sp], #16
st1 {v0.2d, v1.2d}, [sp], #32
st1 {v0.2d, v1.2d, v2.2d}, [sp], #48
st1 {v0.2d, v1.2d, v2.2d, v3.2d}, [sp], #64
st1 {v0.s}[0], [sp], x0
st1 {v0.2s}, [sp], x0
st1 {v0.2s, v1.2s}, [sp], x0
st1 {v0.2s, v1.2s, v2.2s}, [sp], x0
st1 {v0.2s, v1.2s, v2.2s, v3.2s}, [sp], x0
st1 {v0.d}[0], [sp], x0
st1 {v0.2d}, [sp], x0
st1 {v0.2d, v1.2d}, [sp], x0
st1 {v0.2d, v1.2d, v2.2d}, [sp], x0
st1 {v0.2d, v1.2d, v2.2d, v3.2d}, [sp], x0
# ALL: Iterations: 100
# ALL-NEXT: Instructions: 3000
# M3-NEXT: Total Cycles: 10203
# M3-NEXT: Total uOps: 8400
# M4-NEXT: Total Cycles: 6603
# M4-NEXT: Total uOps: 8600
# M5-NEXT: Total Cycles: 6603
# M5-NEXT: Total uOps: 8600
# ALL: Dispatch Width: 6
# M3-NEXT: uOps Per Cycle: 0.82
# M3-NEXT: IPC: 0.29
# M3-NEXT: Block RThroughput: 72.0
# M4-NEXT: uOps Per Cycle: 1.30
# M4-NEXT: IPC: 0.45
# M4-NEXT: Block RThroughput: 33.0
# M5-NEXT: uOps Per Cycle: 1.30
# M5-NEXT: IPC: 0.45
# M5-NEXT: Block RThroughput: 33.0
# ALL: Instruction Info:
# ALL-NEXT: [1]: #uOps
# ALL-NEXT: [2]: Latency
# ALL-NEXT: [3]: RThroughput
# ALL-NEXT: [4]: MayLoad
# ALL-NEXT: [5]: MayStore
# ALL-NEXT: [6]: HasSideEffects (U)
# ALL: [1] [2] [3] [4] [5] [6] Instructions:
# M3-NEXT: 4 7 3.00 * st1 { v0.s }[0], [sp]
# M3-NEXT: 1 1 1.00 * st1 { v0.2s }, [sp]
# M3-NEXT: 2 2 2.00 * st1 { v0.2s, v1.2s }, [sp]
# M3-NEXT: 3 3 3.00 * st1 { v0.2s, v1.2s, v2.2s }, [sp]
# M3-NEXT: 4 4 4.00 * st1 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp]
# M3-NEXT: 4 7 3.00 * st1 { v0.d }[0], [sp]
# M3-NEXT: 1 1 1.00 * st1 { v0.2d }, [sp]
# M3-NEXT: 2 2 2.00 * st1 { v0.2d, v1.2d }, [sp]
# M3-NEXT: 3 3 3.00 * st1 { v0.2d, v1.2d, v2.2d }, [sp]
# M3-NEXT: 4 4 4.00 * st1 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp]
# M3-NEXT: 4 7 3.00 * st1 { v0.s }[0], [sp], #4
# M3-NEXT: 1 1 1.00 * st1 { v0.2s }, [sp], #8
# M3-NEXT: 2 2 2.00 * st1 { v0.2s, v1.2s }, [sp], #16
# M3-NEXT: 3 3 3.00 * st1 { v0.2s, v1.2s, v2.2s }, [sp], #24
# M3-NEXT: 4 4 4.00 * st1 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], #32
# M3-NEXT: 4 7 3.00 * st1 { v0.d }[0], [sp], #8
# M3-NEXT: 1 1 1.00 * st1 { v0.2d }, [sp], #16
# M3-NEXT: 2 2 2.00 * st1 { v0.2d, v1.2d }, [sp], #32
# M3-NEXT: 3 3 3.00 * st1 { v0.2d, v1.2d, v2.2d }, [sp], #48
# M3-NEXT: 4 4 4.00 * st1 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], #64
# M3-NEXT: 4 7 3.00 * st1 { v0.s }[0], [sp], x0
# M3-NEXT: 1 1 1.00 * st1 { v0.2s }, [sp], x0
# M3-NEXT: 2 2 2.00 * st1 { v0.2s, v1.2s }, [sp], x0
# M3-NEXT: 3 3 3.00 * st1 { v0.2s, v1.2s, v2.2s }, [sp], x0
# M3-NEXT: 4 4 4.00 * st1 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], x0
# M3-NEXT: 4 7 3.00 * st1 { v0.d }[0], [sp], x0
# M3-NEXT: 1 1 1.00 * st1 { v0.2d }, [sp], x0
# M3-NEXT: 2 2 2.00 * st1 { v0.2d, v1.2d }, [sp], x0
# M3-NEXT: 3 3 3.00 * st1 { v0.2d, v1.2d, v2.2d }, [sp], x0
# M3-NEXT: 4 4 4.00 * st1 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], x0
# M4-NEXT: 1 1 0.50 * st1 { v0.s }[0], [sp]
# M4-NEXT: 1 1 0.50 * st1 { v0.2s }, [sp]
# M4-NEXT: 2 2 1.00 * st1 { v0.2s, v1.2s }, [sp]
# M4-NEXT: 3 3 1.50 * st1 { v0.2s, v1.2s, v2.2s }, [sp]
# M4-NEXT: 4 4 2.00 * st1 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp]
# M4-NEXT: 1 1 0.50 * st1 { v0.d }[0], [sp]
# M4-NEXT: 1 1 0.50 * st1 { v0.2d }, [sp]
# M4-NEXT: 2 2 1.00 * st1 { v0.2d, v1.2d }, [sp]
# M4-NEXT: 3 3 1.50 * st1 { v0.2d, v1.2d, v2.2d }, [sp]
# M4-NEXT: 4 4 2.00 * st1 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp]
# M4-NEXT: 2 1 0.50 * st1 { v0.s }[0], [sp], #4
# M4-NEXT: 2 1 0.50 * st1 { v0.2s }, [sp], #8
# M4-NEXT: 3 2 1.00 * st1 { v0.2s, v1.2s }, [sp], #16
# M4-NEXT: 4 3 1.50 * st1 { v0.2s, v1.2s, v2.2s }, [sp], #24
# M4-NEXT: 5 4 2.00 * st1 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], #32
# M4-NEXT: 2 1 0.50 * st1 { v0.d }[0], [sp], #8
# M4-NEXT: 2 1 0.50 * st1 { v0.2d }, [sp], #16
# M4-NEXT: 3 2 1.00 * st1 { v0.2d, v1.2d }, [sp], #32
# M4-NEXT: 4 3 1.50 * st1 { v0.2d, v1.2d, v2.2d }, [sp], #48
# M4-NEXT: 5 4 2.00 * st1 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], #64
# M4-NEXT: 2 1 0.50 * st1 { v0.s }[0], [sp], x0
# M4-NEXT: 2 1 0.50 * st1 { v0.2s }, [sp], x0
# M4-NEXT: 3 2 1.00 * st1 { v0.2s, v1.2s }, [sp], x0
# M4-NEXT: 4 3 1.50 * st1 { v0.2s, v1.2s, v2.2s }, [sp], x0
# M4-NEXT: 5 4 2.00 * st1 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], x0
# M4-NEXT: 2 1 0.50 * st1 { v0.d }[0], [sp], x0
# M4-NEXT: 2 1 0.50 * st1 { v0.2d }, [sp], x0
# M4-NEXT: 3 2 1.00 * st1 { v0.2d, v1.2d }, [sp], x0
# M4-NEXT: 4 3 1.50 * st1 { v0.2d, v1.2d, v2.2d }, [sp], x0
# M4-NEXT: 5 4 2.00 * st1 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], x0
# M5-NEXT: 1 1 0.50 * st1 { v0.s }[0], [sp]
# M5-NEXT: 1 1 0.50 * st1 { v0.2s }, [sp]
# M5-NEXT: 2 2 1.00 * st1 { v0.2s, v1.2s }, [sp]
# M5-NEXT: 3 3 1.50 * st1 { v0.2s, v1.2s, v2.2s }, [sp]
# M5-NEXT: 4 4 2.00 * st1 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp]
# M5-NEXT: 1 1 0.50 * st1 { v0.d }[0], [sp]
# M5-NEXT: 1 1 0.50 * st1 { v0.2d }, [sp]
# M5-NEXT: 2 2 1.00 * st1 { v0.2d, v1.2d }, [sp]
# M5-NEXT: 3 3 1.50 * st1 { v0.2d, v1.2d, v2.2d }, [sp]
# M5-NEXT: 4 4 2.00 * st1 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp]
# M5-NEXT: 2 1 0.50 * st1 { v0.s }[0], [sp], #4
# M5-NEXT: 2 1 0.50 * st1 { v0.2s }, [sp], #8
# M5-NEXT: 3 2 1.00 * st1 { v0.2s, v1.2s }, [sp], #16
# M5-NEXT: 4 3 1.50 * st1 { v0.2s, v1.2s, v2.2s }, [sp], #24
# M5-NEXT: 5 4 2.00 * st1 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], #32
# M5-NEXT: 2 1 0.50 * st1 { v0.d }[0], [sp], #8
# M5-NEXT: 2 1 0.50 * st1 { v0.2d }, [sp], #16
# M5-NEXT: 3 2 1.00 * st1 { v0.2d, v1.2d }, [sp], #32
# M5-NEXT: 4 3 1.50 * st1 { v0.2d, v1.2d, v2.2d }, [sp], #48
# M5-NEXT: 5 4 2.00 * st1 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], #64
# M5-NEXT: 2 1 0.50 * st1 { v0.s }[0], [sp], x0
# M5-NEXT: 2 1 0.50 * st1 { v0.2s }, [sp], x0
# M5-NEXT: 3 2 1.00 * st1 { v0.2s, v1.2s }, [sp], x0
# M5-NEXT: 4 3 1.50 * st1 { v0.2s, v1.2s, v2.2s }, [sp], x0
# M5-NEXT: 5 4 2.00 * st1 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], x0
# M5-NEXT: 2 1 0.50 * st1 { v0.d }[0], [sp], x0
# M5-NEXT: 2 1 0.50 * st1 { v0.2d }, [sp], x0
# M5-NEXT: 3 2 1.00 * st1 { v0.2d, v1.2d }, [sp], x0
# M5-NEXT: 4 3 1.50 * st1 { v0.2d, v1.2d, v2.2d }, [sp], x0
# M5-NEXT: 5 4 2.00 * st1 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], x0

View File

@ -0,0 +1,97 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5
st2 {v0.s, v1.s}[0], [sp]
st2 {v0.2s, v1.2s}, [sp]
st2 {v0.d, v1.d}[0], [sp]
st2 {v0.2d, v1.2d}, [sp]
st2 {v0.s, v1.s}[0], [sp], #8
st2 {v0.2s, v1.2s}, [sp], #16
st2 {v0.d, v1.d}[0], [sp], #16
st2 {v0.2d, v1.2d}, [sp], #32
st2 {v0.s, v1.s}[0], [sp], x0
st2 {v0.2s, v1.2s}, [sp], x0
st2 {v0.d, v1.d}[0], [sp], x0
st2 {v0.2d, v1.2d}, [sp], x0
# ALL: Iterations: 100
# ALL-NEXT: Instructions: 1200
# M3-NEXT: Total Cycles: 8703
# M3-NEXT: Total uOps: 5400
# M4-NEXT: Total Cycles: 2403
# M4-NEXT: Total uOps: 2300
# M5-NEXT: Total Cycles: 2403
# M5-NEXT: Total uOps: 2000
# ALL: Dispatch Width: 6
# M3-NEXT: uOps Per Cycle: 0.62
# M3-NEXT: IPC: 0.14
# M3-NEXT: Block RThroughput: 40.5
# M4-NEXT: uOps Per Cycle: 0.96
# M4-NEXT: IPC: 0.50
# M4-NEXT: Block RThroughput: 7.5
# M5-NEXT: uOps Per Cycle: 0.83
# M5-NEXT: IPC: 0.50
# M5-NEXT: Block RThroughput: 7.5
# ALL: Instruction Info:
# ALL-NEXT: [1]: #uOps
# ALL-NEXT: [2]: Latency
# ALL-NEXT: [3]: RThroughput
# ALL-NEXT: [4]: MayLoad
# ALL-NEXT: [5]: MayStore
# ALL-NEXT: [6]: HasSideEffects (U)
# ALL: [1] [2] [3] [4] [5] [6] Instructions:
# M3-NEXT: 4 7 3.00 * st2 { v0.s, v1.s }[0], [sp]
# M3-NEXT: 4 7 3.00 * st2 { v0.2s, v1.2s }, [sp]
# M3-NEXT: 4 7 3.00 * st2 { v0.d, v1.d }[0], [sp]
# M3-NEXT: 6 8 4.50 * st2 { v0.2d, v1.2d }, [sp]
# M3-NEXT: 4 7 3.00 * st2 { v0.s, v1.s }[0], [sp], #8
# M3-NEXT: 4 7 3.00 * st2 { v0.2s, v1.2s }, [sp], #16
# M3-NEXT: 4 7 3.00 * st2 { v0.d, v1.d }[0], [sp], #16
# M3-NEXT: 6 8 4.50 * st2 { v0.2d, v1.2d }, [sp], #32
# M3-NEXT: 4 7 3.00 * st2 { v0.s, v1.s }[0], [sp], x0
# M3-NEXT: 4 7 3.00 * st2 { v0.2s, v1.2s }, [sp], x0
# M3-NEXT: 4 7 3.00 * st2 { v0.d, v1.d }[0], [sp], x0
# M3-NEXT: 6 8 4.50 * st2 { v0.2d, v1.2d }, [sp], x0
# M4-NEXT: 1 2 0.50 * st2 { v0.s, v1.s }[0], [sp]
# M4-NEXT: 1 2 0.50 * st2 { v0.2s, v1.2s }, [sp]
# M4-NEXT: 1 2 0.50 * st2 { v0.d, v1.d }[0], [sp]
# M4-NEXT: 2 2 1.00 * st2 { v0.2d, v1.2d }, [sp]
# M4-NEXT: 2 2 0.50 * st2 { v0.s, v1.s }[0], [sp], #8
# M4-NEXT: 2 2 0.50 * st2 { v0.2s, v1.2s }, [sp], #16
# M4-NEXT: 2 2 0.50 * st2 { v0.d, v1.d }[0], [sp], #16
# M4-NEXT: 3 2 1.00 * st2 { v0.2d, v1.2d }, [sp], #32
# M4-NEXT: 2 2 0.50 * st2 { v0.s, v1.s }[0], [sp], x0
# M4-NEXT: 2 2 0.50 * st2 { v0.2s, v1.2s }, [sp], x0
# M4-NEXT: 2 2 0.50 * st2 { v0.d, v1.d }[0], [sp], x0
# M4-NEXT: 3 2 1.00 * st2 { v0.2d, v1.2d }, [sp], x0
# M5-NEXT: 1 2 0.50 * st2 { v0.s, v1.s }[0], [sp]
# M5-NEXT: 1 2 0.50 * st2 { v0.2s, v1.2s }, [sp]
# M5-NEXT: 1 2 0.50 * st2 { v0.d, v1.d }[0], [sp]
# M5-NEXT: 1 2 1.00 * st2 { v0.2d, v1.2d }, [sp]
# M5-NEXT: 2 2 0.50 * st2 { v0.s, v1.s }[0], [sp], #8
# M5-NEXT: 2 2 0.50 * st2 { v0.2s, v1.2s }, [sp], #16
# M5-NEXT: 2 2 0.50 * st2 { v0.d, v1.d }[0], [sp], #16
# M5-NEXT: 2 2 1.00 * st2 { v0.2d, v1.2d }, [sp], #32
# M5-NEXT: 2 2 0.50 * st2 { v0.s, v1.s }[0], [sp], x0
# M5-NEXT: 2 2 0.50 * st2 { v0.2s, v1.2s }, [sp], x0
# M5-NEXT: 2 2 0.50 * st2 { v0.d, v1.d }[0], [sp], x0
# M5-NEXT: 2 2 1.00 * st2 { v0.2d, v1.2d }, [sp], x0

View File

@ -0,0 +1,97 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5
st3 {v0.s, v1.s, v2.s}[0], [sp]
st3 {v0.2s, v1.2s, v2.2s}, [sp]
st3 {v0.d, v1.d, v2.d}[0], [sp]
st3 {v0.2d, v1.2d, v2.2d}, [sp]
st3 {v0.s, v1.s, v2.s}[0], [sp], #12
st3 {v0.2s, v1.2s, v2.2s}, [sp], #24
st3 {v0.d, v1.d, v2.d}[0], [sp], #24
st3 {v0.2d, v1.2d, v2.2d}, [sp], #48
st3 {v0.s, v1.s, v2.s}[0], [sp], x0
st3 {v0.2s, v1.2s, v2.2s}, [sp], x0
st3 {v0.d, v1.d, v2.d}[0], [sp], x0
st3 {v0.2d, v1.2d, v2.2d}, [sp], x0
# ALL: Iterations: 100
# ALL-NEXT: Instructions: 1200
# M3-NEXT: Total Cycles: 18003
# M3-NEXT: Total uOps: 8400
# M4-NEXT: Total Cycles: 3903
# M4-NEXT: Total uOps: 5000
# M5-NEXT: Total Cycles: 3603
# M5-NEXT: Total uOps: 4400
# ALL: Dispatch Width: 6
# M3-NEXT: uOps Per Cycle: 0.47
# M3-NEXT: IPC: 0.07
# M3-NEXT: Block RThroughput: 72.0
# M4-NEXT: uOps Per Cycle: 1.28
# M4-NEXT: IPC: 0.31
# M4-NEXT: Block RThroughput: 21.0
# M5-NEXT: uOps Per Cycle: 1.22
# M5-NEXT: IPC: 0.33
# M5-NEXT: Block RThroughput: 10.5
# ALL: Instruction Info:
# ALL-NEXT: [1]: #uOps
# ALL-NEXT: [2]: Latency
# ALL-NEXT: [3]: RThroughput
# ALL-NEXT: [4]: MayLoad
# ALL-NEXT: [5]: MayStore
# ALL-NEXT: [6]: HasSideEffects (U)
# ALL: [1] [2] [3] [4] [5] [6] Instructions:
# M3-NEXT: 5 14 4.50 * st3 { v0.s, v1.s, v2.s }[0], [sp]
# M3-NEXT: 7 15 6.00 * st3 { v0.2s, v1.2s, v2.2s }, [sp]
# M3-NEXT: 7 15 6.00 * st3 { v0.d, v1.d, v2.d }[0], [sp]
# M3-NEXT: 9 16 7.50 * st3 { v0.2d, v1.2d, v2.2d }, [sp]
# M3-NEXT: 5 14 4.50 * st3 { v0.s, v1.s, v2.s }[0], [sp], #12
# M3-NEXT: 7 15 6.00 * st3 { v0.2s, v1.2s, v2.2s }, [sp], #24
# M3-NEXT: 7 15 6.00 * st3 { v0.d, v1.d, v2.d }[0], [sp], #24
# M3-NEXT: 9 16 7.50 * st3 { v0.2d, v1.2d, v2.2d }, [sp], #48
# M3-NEXT: 5 14 4.50 * st3 { v0.s, v1.s, v2.s }[0], [sp], x0
# M3-NEXT: 7 15 6.00 * st3 { v0.2s, v1.2s, v2.2s }, [sp], x0
# M3-NEXT: 7 15 6.00 * st3 { v0.d, v1.d, v2.d }[0], [sp], x0
# M3-NEXT: 9 16 7.50 * st3 { v0.2d, v1.2d, v2.2d }, [sp], x0
# M4-NEXT: 2 2 1.00 * st3 { v0.s, v1.s, v2.s }[0], [sp]
# M4-NEXT: 4 4 2.00 * st3 { v0.2s, v1.2s, v2.2s }, [sp]
# M4-NEXT: 2 2 1.00 * st3 { v0.d, v1.d, v2.d }[0], [sp]
# M4-NEXT: 6 5 3.00 * st3 { v0.2d, v1.2d, v2.2d }, [sp]
# M4-NEXT: 3 2 1.00 * st3 { v0.s, v1.s, v2.s }[0], [sp], #12
# M4-NEXT: 5 4 2.00 * st3 { v0.2s, v1.2s, v2.2s }, [sp], #24
# M4-NEXT: 3 2 1.00 * st3 { v0.d, v1.d, v2.d }[0], [sp], #24
# M4-NEXT: 7 5 3.00 * st3 { v0.2d, v1.2d, v2.2d }, [sp], #48
# M4-NEXT: 3 2 1.00 * st3 { v0.s, v1.s, v2.s }[0], [sp], x0
# M4-NEXT: 5 4 2.00 * st3 { v0.2s, v1.2s, v2.2s }, [sp], x0
# M4-NEXT: 3 2 1.00 * st3 { v0.d, v1.d, v2.d }[0], [sp], x0
# M4-NEXT: 7 5 3.00 * st3 { v0.2d, v1.2d, v2.2d }, [sp], x0
# M5-NEXT: 2 2 1.00 * st3 { v0.s, v1.s, v2.s }[0], [sp]
# M5-NEXT: 3 4 1.00 * st3 { v0.2s, v1.2s, v2.2s }, [sp]
# M5-NEXT: 2 2 1.00 * st3 { v0.d, v1.d, v2.d }[0], [sp]
# M5-NEXT: 5 4 1.50 * st3 { v0.2d, v1.2d, v2.2d }, [sp]
# M5-NEXT: 3 2 1.00 * st3 { v0.s, v1.s, v2.s }[0], [sp], #12
# M5-NEXT: 4 4 1.00 * st3 { v0.2s, v1.2s, v2.2s }, [sp], #24
# M5-NEXT: 3 2 1.00 * st3 { v0.d, v1.d, v2.d }[0], [sp], #24
# M5-NEXT: 6 4 1.50 * st3 { v0.2d, v1.2d, v2.2d }, [sp], #48
# M5-NEXT: 3 2 1.00 * st3 { v0.s, v1.s, v2.s }[0], [sp], x0
# M5-NEXT: 4 4 1.00 * st3 { v0.2s, v1.2s, v2.2s }, [sp], x0
# M5-NEXT: 3 2 1.00 * st3 { v0.d, v1.d, v2.d }[0], [sp], x0
# M5-NEXT: 6 4 1.50 * st3 { v0.2d, v1.2d, v2.2d }, [sp], x0

View File

@ -0,0 +1,97 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5
st4 {v0.s, v1.s, v2.s, v3.s}[0], [sp]
st4 {v0.2s, v1.2s, v2.2s, v3.2s}, [sp]
st4 {v0.d, v1.d, v2.d, v3.d}[0], [sp]
st4 {v0.2d, v1.2d, v2.2d, v3.2d}, [sp]
st4 {v0.s, v1.s, v2.s, v3.s}[0], [sp], #16
st4 {v0.2s, v1.2s, v2.2s, v3.2s}, [sp], #32
st4 {v0.d, v1.d, v2.d, v3.d}[0], [sp], #32
st4 {v0.2d, v1.2d, v2.2d, v3.2d}, [sp], #64
st4 {v0.s, v1.s, v2.s, v3.s}[0], [sp], x0
st4 {v0.2s, v1.2s, v2.2s, v3.2s}, [sp], x0
st4 {v0.d, v1.d, v2.d, v3.d}[0], [sp], x0
st4 {v0.2d, v1.2d, v2.2d, v3.2d}, [sp], x0
# ALL: Iterations: 100
# ALL-NEXT: Instructions: 1200
# M3-NEXT: Total Cycles: 18603
# M3-NEXT: Total uOps: 9000
# M4-NEXT: Total Cycles: 4803
# M4-NEXT: Total uOps: 4700
# M5-NEXT: Total Cycles: 4803
# M5-NEXT: Total uOps: 4700
# ALL: Dispatch Width: 6
# M3-NEXT: uOps Per Cycle: 0.48
# M3-NEXT: IPC: 0.06
# M3-NEXT: Block RThroughput: 76.5
# M4-NEXT: uOps Per Cycle: 0.98
# M4-NEXT: IPC: 0.25
# M4-NEXT: Block RThroughput: 24.0
# M5-NEXT: uOps Per Cycle: 0.98
# M5-NEXT: IPC: 0.25
# M5-NEXT: Block RThroughput: 24.0
# ALL: Instruction Info:
# ALL-NEXT: [1]: #uOps
# ALL-NEXT: [2]: Latency
# ALL-NEXT: [3]: RThroughput
# ALL-NEXT: [4]: MayLoad
# ALL-NEXT: [5]: MayStore
# ALL-NEXT: [6]: HasSideEffects (U)
# ALL: [1] [2] [3] [4] [5] [6] Instructions:
# M3-NEXT: 7 15 6.00 * st4 { v0.s, v1.s, v2.s, v3.s }[0], [sp]
# M3-NEXT: 7 15 6.00 * st4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp]
# M3-NEXT: 7 15 6.00 * st4 { v0.d, v1.d, v2.d, v3.d }[0], [sp]
# M3-NEXT: 9 17 7.50 * st4 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp]
# M3-NEXT: 7 15 6.00 * st4 { v0.s, v1.s, v2.s, v3.s }[0], [sp], #16
# M3-NEXT: 7 15 6.00 * st4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], #32
# M3-NEXT: 7 15 6.00 * st4 { v0.d, v1.d, v2.d, v3.d }[0], [sp], #32
# M3-NEXT: 9 17 7.50 * st4 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], #64
# M3-NEXT: 7 15 6.00 * st4 { v0.s, v1.s, v2.s, v3.s }[0], [sp], x0
# M3-NEXT: 7 15 6.00 * st4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], x0
# M3-NEXT: 7 15 6.00 * st4 { v0.d, v1.d, v2.d, v3.d }[0], [sp], x0
# M3-NEXT: 9 17 7.50 * st4 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], x0
# M4-NEXT: 2 2 1.00 * st4 { v0.s, v1.s, v2.s, v3.s }[0], [sp]
# M4-NEXT: 4 4 2.00 * st4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp]
# M4-NEXT: 2 2 1.00 * st4 { v0.d, v1.d, v2.d, v3.d }[0], [sp]
# M4-NEXT: 5 8 4.00 * st4 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp]
# M4-NEXT: 3 2 1.00 * st4 { v0.s, v1.s, v2.s, v3.s }[0], [sp], #16
# M4-NEXT: 5 4 2.00 * st4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], #32
# M4-NEXT: 3 2 1.00 * st4 { v0.d, v1.d, v2.d, v3.d }[0], [sp], #32
# M4-NEXT: 6 8 4.00 * st4 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], #64
# M4-NEXT: 3 2 1.00 * st4 { v0.s, v1.s, v2.s, v3.s }[0], [sp], x0
# M4-NEXT: 5 4 2.00 * st4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], x0
# M4-NEXT: 3 2 1.00 * st4 { v0.d, v1.d, v2.d, v3.d }[0], [sp], x0
# M4-NEXT: 6 8 4.00 * st4 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], x0
# M5-NEXT: 2 2 1.00 * st4 { v0.s, v1.s, v2.s, v3.s }[0], [sp]
# M5-NEXT: 4 4 2.00 * st4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp]
# M5-NEXT: 2 2 1.00 * st4 { v0.d, v1.d, v2.d, v3.d }[0], [sp]
# M5-NEXT: 5 8 4.00 * st4 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp]
# M5-NEXT: 3 2 1.00 * st4 { v0.s, v1.s, v2.s, v3.s }[0], [sp], #16
# M5-NEXT: 5 4 2.00 * st4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], #32
# M5-NEXT: 3 2 1.00 * st4 { v0.d, v1.d, v2.d, v3.d }[0], [sp], #32
# M5-NEXT: 6 8 4.00 * st4 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], #64
# M5-NEXT: 3 2 1.00 * st4 { v0.s, v1.s, v2.s, v3.s }[0], [sp], x0
# M5-NEXT: 5 4 2.00 * st4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], x0
# M5-NEXT: 3 2 1.00 * st4 { v0.d, v1.d, v2.d, v3.d }[0], [sp], x0
# M5-NEXT: 6 8 4.00 * st4 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], x0

View File

@ -0,0 +1,58 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5
crc32w w0, w1, w2
crc32w w0, w0, w3
crc32cx w0, w1, x2
crc32cx w0, w0, x3
# ALL: Iterations: 100
# ALL-NEXT: Instructions: 400
# M3-NEXT: Total Cycles: 204
# M4-NEXT: Total Cycles: 404
# M5-NEXT: Total Cycles: 204
# ALL-NEXT: Total uOps: 400
# ALL: Dispatch Width: 6
# M3-NEXT: uOps Per Cycle: 1.96
# M3-NEXT: IPC: 1.96
# M3-NEXT: Block RThroughput: 2.0
# M4-NEXT: uOps Per Cycle: 0.99
# M4-NEXT: IPC: 0.99
# M4-NEXT: Block RThroughput: 4.0
# M5-NEXT: uOps Per Cycle: 1.96
# M5-NEXT: IPC: 1.96
# M5-NEXT: Block RThroughput: 2.0
# ALL: Instruction Info:
# ALL-NEXT: [1]: #uOps
# ALL-NEXT: [2]: Latency
# ALL-NEXT: [3]: RThroughput
# ALL-NEXT: [4]: MayLoad
# ALL-NEXT: [5]: MayStore
# ALL-NEXT: [6]: HasSideEffects (U)
# ALL: [1] [2] [3] [4] [5] [6] Instructions:
# M3-NEXT: 1 2 0.50 crc32w w0, w1, w2
# M3-NEXT: 1 2 0.50 crc32w w0, w0, w3
# M3-NEXT: 1 2 0.50 crc32cx w0, w1, x2
# M3-NEXT: 1 2 0.50 crc32cx w0, w0, x3
# M4-NEXT: 1 2 1.00 crc32w w0, w1, w2
# M4-NEXT: 1 2 1.00 crc32w w0, w0, w3
# M4-NEXT: 1 2 1.00 crc32cx w0, w1, x2
# M4-NEXT: 1 2 1.00 crc32cx w0, w0, x3
# M5-NEXT: 1 2 0.50 crc32w w0, w1, w2
# M5-NEXT: 1 2 0.50 crc32w w0, w0, w3
# M5-NEXT: 1 2 0.50 crc32cx w0, w1, x2
# M5-NEXT: 1 2 0.50 crc32cx w0, w0, x3

View File

@ -1,6 +1,7 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=aarch64-linux-gnu -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3 # RUN: llvm-mca -mtriple=aarch64-linux-gnu -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3
# RUN: llvm-mca -mtriple=aarch64-linux-gnu -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4 # RUN: llvm-mca -mtriple=aarch64-linux-gnu -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4
# RUN: llvm-mca -mtriple=aarch64-linux-gnu -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5
b main b main
@ -9,6 +10,7 @@
# M3-NEXT: Total Cycles: 18 # M3-NEXT: Total Cycles: 18
# M4-NEXT: Total Cycles: 18 # M4-NEXT: Total Cycles: 18
# M5-NEXT: Total Cycles: 18
# ALL-NEXT: Total uOps: 100 # ALL-NEXT: Total uOps: 100
@ -22,6 +24,11 @@
# M4-NEXT: IPC: 5.56 # M4-NEXT: IPC: 5.56
# M4-NEXT: Block RThroughput: 0.2 # M4-NEXT: Block RThroughput: 0.2
# M5: Dispatch Width: 6
# M5-NEXT: uOps Per Cycle: 5.56
# M5-NEXT: IPC: 5.56
# M5-NEXT: Block RThroughput: 0.2
# ALL: Instruction Info: # ALL: Instruction Info:
# ALL-NEXT: [1]: #uOps # ALL-NEXT: [1]: #uOps
# ALL-NEXT: [2]: Latency # ALL-NEXT: [2]: Latency
@ -34,3 +41,4 @@
# M3-NEXT: 1 0 0.17 b main # M3-NEXT: 1 0 0.17 b main
# M4-NEXT: 1 0 0.17 b main # M4-NEXT: 1 0 0.17 b main
# M5-NEXT: 1 0 0.17 b main

View File

@ -0,0 +1,67 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,EM3
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,EM4
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,EM5
sdiv w0, w1, w2
udiv x1, x2, x3
mul w2, w3, w4
msub x3, x4, x5, x6
smull x4, w5, w6
umulh x5, x6, x7
# ALL: Iterations: 100
# ALL-NEXT: Instructions: 600
# EM3-NEXT: Total Cycles: 3305
# EM4-NEXT: Total Cycles: 3303
# EM5-NEXT: Total Cycles: 2603
# ALL-NEXT: Total uOps: 600
# ALL: Dispatch Width: 6
# EM3-NEXT: uOps Per Cycle: 0.18
# EM3-NEXT: IPC: 0.18
# EM3-NEXT: Block RThroughput: 33.0
# EM4-NEXT: uOps Per Cycle: 0.18
# EM4-NEXT: IPC: 0.18
# EM4-NEXT: Block RThroughput: 33.0
# EM5-NEXT: uOps Per Cycle: 0.23
# EM5-NEXT: IPC: 0.23
# EM5-NEXT: Block RThroughput: 26.0
# ALL: Instruction Info:
# ALL-NEXT: [1]: #uOps
# ALL-NEXT: [2]: Latency
# ALL-NEXT: [3]: RThroughput
# ALL-NEXT: [4]: MayLoad
# ALL-NEXT: [5]: MayStore
# ALL-NEXT: [6]: HasSideEffects (U)
# ALL: [1] [2] [3] [4] [5] [6] Instructions:
# EM3-NEXT: 1 12 12.00 sdiv w0, w1, w2
# EM3-NEXT: 1 21 21.00 udiv x1, x2, x3
# EM3-NEXT: 1 3 0.50 mul w2, w3, w4
# EM3-NEXT: 1 4 1.00 msub x3, x4, x5, x6
# EM3-NEXT: 1 3 0.50 smull x4, w5, w6
# EM3-NEXT: 1 4 1.00 umulh x5, x6, x7
# EM4-NEXT: 1 12 12.00 sdiv w0, w1, w2
# EM4-NEXT: 1 21 21.00 udiv x1, x2, x3
# EM4-NEXT: 1 3 0.50 mul w2, w3, w4
# EM4-NEXT: 1 4 1.00 msub x3, x4, x5, x6
# EM4-NEXT: 1 3 0.50 smull x4, w5, w6
# EM4-NEXT: 1 4 1.00 umulh x5, x6, x7
# EM5-NEXT: 1 10 10.00 sdiv w0, w1, w2
# EM5-NEXT: 1 16 16.00 udiv x1, x2, x3
# EM5-NEXT: 1 2 0.50 mul w2, w3, w4
# EM5-NEXT: 1 3 1.00 msub x3, x4, x5, x6
# EM5-NEXT: 1 2 0.50 smull x4, w5, w6
# EM5-NEXT: 1 3 1.00 umulh x5, x6, x7

View File

@ -0,0 +1,66 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5
fmov d31, #1.00000000
fdiv d30, d31, d30
# Newton series for 1 / x.
frecpe d1, d0
frecps d2, d0, d1
fmul d1, d1, d2
frecps d2, d0, d1
fmul d1, d1, d2
frecps d0, d0, d1
fmul d0, d1, d0
# ALL: Iterations: 100
# ALL-NEXT: Instructions: 900
# M3-NEXT: Total Cycles: 2503
# M4-NEXT: Total Cycles: 2403
# M5-NEXT: Total Cycles: 2403
# ALL-NEXT: Total uOps: 900
# ALL: Dispatch Width: 6
# M3-NEXT: uOps Per Cycle: 0.36
# M3-NEXT: IPC: 0.36
# M3-NEXT: Block RThroughput: 3.3
# M4-NEXT: uOps Per Cycle: 0.37
# M4-NEXT: IPC: 0.37
# M4-NEXT: Block RThroughput: 2.3
# M5-NEXT: uOps Per Cycle: 0.37
# M5-NEXT: IPC: 0.37
# M5-NEXT: Block RThroughput: 2.3
# ALL: Instruction Info:
# ALL-NEXT: [1]: #uOps
# ALL-NEXT: [2]: Latency
# ALL-NEXT: [3]: RThroughput
# ALL-NEXT: [4]: MayLoad
# ALL-NEXT: [5]: MayStore
# ALL-NEXT: [6]: HasSideEffects (U)
# ALL: [1] [2] [3] [4] [5] [6] Instructions:
# ALL-NEXT: 1 1 0.33 fmov d31, #1.00000000
# M3-NEXT: 1 12 3.25 fdiv d30, d31, d30
# M3-NEXT: 1 4 0.50 frecpe d1, d0
# M4-NEXT: 1 12 2.25 fdiv d30, d31, d30
# M4-NEXT: 1 3 0.50 frecpe d1, d0
# M5-NEXT: 1 12 2.25 fdiv d30, d31, d30
# M5-NEXT: 1 3 0.50 frecpe d1, d0
# ALL-NEXT: 1 4 0.33 frecps d2, d0, d1
# ALL-NEXT: 1 3 0.33 fmul d1, d1, d2
# ALL-NEXT: 1 4 0.33 frecps d2, d0, d1
# ALL-NEXT: 1 3 0.33 fmul d1, d1, d2
# ALL-NEXT: 1 4 0.33 frecps d0, d0, d1
# ALL-NEXT: 1 3 0.33 fmul d0, d1, d0

View File

@ -0,0 +1,78 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5
fsqrt d30, d30
fmov d31, #1.00000000
fdiv d30, d31, d30
# Newton series for 1 / sqrt().
frsqrte d1, d0
fmul d2, d1, d1
frsqrts d2, d0, d2
fmul d1, d1, d2
fmul d2, d1, d1
frsqrts d2, d0, d2
fmul d1, d1, d2
fmul d2, d1, d1
frsqrts d0, d0, d2
fmul d0, d1, d0
# ALL: Iterations: 100
# ALL-NEXT: Instructions: 1300
# M3-NEXT: Total Cycles: 3703
# M4-NEXT: Total Cycles: 3303
# M5-NEXT: Total Cycles: 3303
# ALL-NEXT: Total uOps: 1300
# ALL: Dispatch Width: 6
# M3-NEXT: uOps Per Cycle: 0.35
# M3-NEXT: IPC: 0.35
# M3-NEXT: Block RThroughput: 26.0
# M4-NEXT: uOps Per Cycle: 0.39
# M4-NEXT: IPC: 0.39
# M4-NEXT: Block RThroughput: 3.0
# M5-NEXT: uOps Per Cycle: 0.39
# M5-NEXT: IPC: 0.39
# M5-NEXT: Block RThroughput: 3.0
# ALL: Instruction Info:
# ALL-NEXT: [1]: #uOps
# ALL-NEXT: [2]: Latency
# ALL-NEXT: [3]: RThroughput
# ALL-NEXT: [4]: MayLoad
# ALL-NEXT: [5]: MayStore
# ALL-NEXT: [6]: HasSideEffects (U)
# ALL: [1] [2] [3] [4] [5] [6] Instructions:
# M3-NEXT: 1 25 26.00 fsqrt d30, d30
# M4-NEXT: 1 12 2.25 fsqrt d30, d30
# M5-NEXT: 1 12 2.25 fsqrt d30, d30
# ALL-NEXT: 1 1 0.33 fmov d31, #1.00000000
# M3-NEXT: 1 12 3.25 fdiv d30, d31, d30
# M3-NEXT: 1 4 0.50 frsqrte d1, d0
# M4-NEXT: 1 12 2.25 fdiv d30, d31, d30
# M4-NEXT: 1 3 0.50 frsqrte d1, d0
# M5-NEXT: 1 12 2.25 fdiv d30, d31, d30
# M5-NEXT: 1 3 0.50 frsqrte d1, d0
# ALL-NEXT: 1 3 0.33 fmul d2, d1, d1
# ALL-NEXT: 1 4 0.33 frsqrts d2, d0, d2
# ALL-NEXT: 1 3 0.33 fmul d1, d1, d2
# ALL-NEXT: 1 3 0.33 fmul d2, d1, d1
# ALL-NEXT: 1 4 0.33 frsqrts d2, d0, d2
# ALL-NEXT: 1 3 0.33 fmul d1, d1, d2
# ALL-NEXT: 1 3 0.33 fmul d2, d1, d1
# ALL-NEXT: 1 4 0.33 frsqrts d0, d0, d2
# ALL-NEXT: 1 3 0.33 fmul d0, d1, d0

View File

@ -0,0 +1,79 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5
fsqrt d31, d31
# Newton series for sqrt().
frsqrte d1, d0
fmul d2, d1, d1
frsqrts d2, d0, d2
fmul d1, d1, d2
fmul d2, d1, d1
frsqrts d2, d0, d2
fmul d1, d1, d2
fmul d2, d1, d1
frsqrts d2, d0, d2
fmul d2, d2, d0
fmul d1, d1, d2
fcmp d0, #0.0
fcsel d0, d0, d1, eq
# ALL: Iterations: 100
# ALL-NEXT: Instructions: 1400
# M3-NEXT: Total Cycles: 4203
# M4-NEXT: Total Cycles: 4103
# M5-NEXT: Total Cycles: 3803
# ALL-NEXT: Total uOps: 1500
# ALL: Dispatch Width: 6
# M3-NEXT: uOps Per Cycle: 0.36
# M3-NEXT: IPC: 0.33
# M3-NEXT: Block RThroughput: 27.0
# M4-NEXT: uOps Per Cycle: 0.37
# M4-NEXT: IPC: 0.34
# M4-NEXT: Block RThroughput: 3.3
# M5-NEXT: uOps Per Cycle: 0.39
# M5-NEXT: IPC: 0.37
# M5-NEXT: Block RThroughput: 3.3
# ALL: Instruction Info:
# ALL-NEXT: [1]: #uOps
# ALL-NEXT: [2]: Latency
# ALL-NEXT: [3]: RThroughput
# ALL-NEXT: [4]: MayLoad
# ALL-NEXT: [5]: MayStore
# ALL-NEXT: [6]: HasSideEffects (U)
# ALL: [1] [2] [3] [4] [5] [6] Instructions:
# M3-NEXT: 1 25 26.00 fsqrt d31, d31
# M3-NEXT: 1 4 0.50 frsqrte d1, d0
# M4-NEXT: 1 12 2.25 fsqrt d31, d31
# M4-NEXT: 1 3 0.50 frsqrte d1, d0
# M5-NEXT: 1 12 2.25 fsqrt d31, d31
# M5-NEXT: 1 3 0.50 frsqrte d1, d0
# ALL-NEXT: 1 3 0.33 fmul d2, d1, d1
# ALL-NEXT: 1 4 0.33 frsqrts d2, d0, d2
# ALL-NEXT: 1 3 0.33 fmul d1, d1, d2
# ALL-NEXT: 1 3 0.33 fmul d2, d1, d1
# ALL-NEXT: 1 4 0.33 frsqrts d2, d0, d2
# ALL-NEXT: 1 3 0.33 fmul d1, d1, d2
# ALL-NEXT: 1 3 0.33 fmul d2, d1, d1
# ALL-NEXT: 1 4 0.33 frsqrts d2, d0, d2
# ALL-NEXT: 1 3 0.33 fmul d2, d2, d0
# ALL-NEXT: 1 3 0.33 fmul d1, d1, d2
# ALL-NEXT: 1 2 1.00 fcmp d0, #0.0
# M3-NEXT: 2 5 1.00 fcsel d0, d0, d1, eq
# M4-NEXT: 2 5 1.00 fcsel d0, d0, d1, eq
# M5-NEXT: 2 2 1.00 fcsel d0, d0, d1, eq

View File

@ -1,6 +1,7 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,EM3 # RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,EM3
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,EM4 # RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,EM4
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,EM5
sub w0, w1, w2, sxtb #0 sub w0, w1, w2, sxtb #0
add x3, x4, w5, sxth #1 add x3, x4, w5, sxth #1
@ -16,6 +17,7 @@
# EM3-NEXT: Total Cycles: 304 # EM3-NEXT: Total Cycles: 304
# EM4-NEXT: Total Cycles: 304 # EM4-NEXT: Total Cycles: 304
# EM5-NEXT: Total Cycles: 254
# ALL-NEXT: Total uOps: 800 # ALL-NEXT: Total uOps: 800
@ -29,6 +31,11 @@
# EM4-NEXT: IPC: 2.63 # EM4-NEXT: IPC: 2.63
# EM4-NEXT: Block RThroughput: 3.0 # EM4-NEXT: Block RThroughput: 3.0
# EM5: Dispatch Width: 6
# EM5-NEXT: uOps Per Cycle: 3.15
# EM5-NEXT: IPC: 3.15
# EM5-NEXT: Block RThroughput: 2.5
# ALL: Instruction Info: # ALL: Instruction Info:
# ALL-NEXT: [1]: #uOps # ALL-NEXT: [1]: #uOps
# ALL-NEXT: [2]: Latency # ALL-NEXT: [2]: Latency
@ -56,3 +63,12 @@
# EM4-NEXT: 1 2 0.50 add x15, x16, w17, uxth #1 # EM4-NEXT: 1 2 0.50 add x15, x16, w17, uxth #1
# EM4-NEXT: 1 2 0.50 subs x18, x19, w20, sxtw #2 # EM4-NEXT: 1 2 0.50 subs x18, x19, w20, sxtw #2
# EM4-NEXT: 1 2 0.50 adds x21, x22, x23, sxtx #3 # EM4-NEXT: 1 2 0.50 adds x21, x22, x23, sxtx #3
# EM5-NEXT: 1 1 0.17 sub w0, w1, w2, sxtb
# EM5-NEXT: 1 2 0.50 add x3, x4, w5, sxth #1
# EM5-NEXT: 1 1 0.25 subs x6, x7, w8, uxtw #2
# EM5-NEXT: 1 1 0.25 adds x9, x10, x11, uxtx #3
# EM5-NEXT: 1 1 0.17 sub w12, w13, w14, uxtb
# EM5-NEXT: 1 2 0.50 add x15, x16, w17, uxth #1
# EM5-NEXT: 1 2 0.50 subs x18, x19, w20, sxtw #2
# EM5-NEXT: 1 2 0.50 adds x21, x22, x23, sxtx #3

View File

@ -0,0 +1,94 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,EM3
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,EM4
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,EM5
fdiv h0, h1, h2
fdiv s1, s2, s3
fdiv d2, d3, d4
fmul h3, h4, h5
fmul s4, s5, s6
fmul d5, d6, d7
fmadd h6, h7, h8, h9
fmadd s7, s8, s9, s10
fmadd d8, d9, d10, d11
fsqrt h9, h10
fsqrt s10, s11
fsqrt d11, d12
# ALL: Iterations: 100
# EM3-NEXT: Instructions: 800
# EM3-NEXT: Total Cycles: 4503
# EM3-NEXT: Total uOps: 800
# EM4-NEXT: Instructions: 1200
# EM4-NEXT: Total Cycles: 575
# EM4-NEXT: Total uOps: 1200
# EM5-NEXT: Instructions: 1200
# EM5-NEXT: Total Cycles: 433
# EM5-NEXT: Total uOps: 1200
# ALL: Dispatch Width: 6
# EM3-NEXT: uOps Per Cycle: 0.18
# EM3-NEXT: IPC: 0.18
# EM3-NEXT: Block RThroughput: 45.0
# EM4-NEXT: uOps Per Cycle: 2.09
# EM4-NEXT: IPC: 2.09
# EM4-NEXT: Block RThroughput: 4.0
# EM5-NEXT: uOps Per Cycle: 2.77
# EM5-NEXT: IPC: 2.77
# EM5-NEXT: Block RThroughput: 4.0
# ALL: Instruction Info:
# ALL-NEXT: [1]: #uOps
# ALL-NEXT: [2]: Latency
# ALL-NEXT: [3]: RThroughput
# ALL-NEXT: [4]: MayLoad
# ALL-NEXT: [5]: MayStore
# ALL-NEXT: [6]: HasSideEffects (U)
# EM3: [1] [2] [3] [4] [5] [6] Instructions:
# EM3-NEXT: 1 7 2.00 fdiv s1, s2, s3
# EM3-NEXT: 1 12 3.25 fdiv d2, d3, d4
# EM3-NEXT: 1 3 0.33 fmul s4, s5, s6
# EM3-NEXT: 1 3 0.33 fmul d5, d6, d7
# EM3-NEXT: 1 4 0.33 fmadd s7, s8, s9, s10
# EM3-NEXT: 1 4 0.33 fmadd d8, d9, d10, d11
# EM3-NEXT: 1 18 19.00 fsqrt s10, s11
# EM3-NEXT: 1 25 26.00 fsqrt d11, d12
# EM4: [1] [2] [3] [4] [5] [6] Instructions:
# EM4-NEXT: 1 7 3.00 fdiv h0, h1, h2
# EM4-NEXT: 1 7 1.50 fdiv s1, s2, s3
# EM4-NEXT: 1 12 2.25 fdiv d2, d3, d4
# EM4-NEXT: 1 3 0.50 fmul h3, h4, h5
# EM4-NEXT: 1 3 0.33 fmul s4, s5, s6
# EM4-NEXT: 1 3 0.33 fmul d5, d6, d7
# EM4-NEXT: 1 4 0.50 fmadd h6, h7, h8, h9
# EM4-NEXT: 1 4 0.33 fmadd s7, s8, s9, s10
# EM4-NEXT: 1 4 0.33 fmadd d8, d9, d10, d11
# EM4-NEXT: 1 7 3.00 fsqrt h9, h10
# EM4-NEXT: 1 8 1.75 fsqrt s10, s11
# EM4-NEXT: 1 12 2.25 fsqrt d11, d12
# EM5: [1] [2] [3] [4] [5] [6] Instructions:
# EM5-NEXT: 1 5 0.50 fdiv h0, h1, h2
# EM5-NEXT: 1 7 1.00 fdiv s1, s2, s3
# EM5-NEXT: 1 12 2.25 fdiv d2, d3, d4
# EM5-NEXT: 1 3 0.33 fmul h3, h4, h5
# EM5-NEXT: 1 3 0.33 fmul s4, s5, s6
# EM5-NEXT: 1 3 0.33 fmul d5, d6, d7
# EM5-NEXT: 1 4 0.33 fmadd h6, h7, h8, h9
# EM5-NEXT: 1 4 0.33 fmadd s7, s8, s9, s10
# EM5-NEXT: 1 4 0.33 fmadd d8, d9, d10, d11
# EM5-NEXT: 1 5 0.50 fsqrt h9, h10
# EM5-NEXT: 1 8 1.25 fsqrt s10, s11
# EM5-NEXT: 1 12 2.25 fsqrt d11, d12

View File

@ -0,0 +1,114 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,EM3
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,EM4
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,EM5
scvtf h0, w0
scvtf s1, w1
scvtf d2, x2
fcvtzs w3, h3
fcvtzs w4, s4
fcvtzs x5, d5
fmov h6, #2.0
fmov s7, #4.0
fmov d8, #8.0
fmov h9, w9
fmov s10, w10
fmov d11, x11
fmov v12.d[1], x12
fmov w13, h13
fmov w14, s14
fmov x15, d15
fmov x16, v16.d[1]
# ALL: Iterations: 100
# EM3-NEXT: Instructions: 1200
# EM3-NEXT: Total Cycles: 405
# EM3-NEXT: Total uOps: 1400
# EM4-NEXT: Instructions: 1700
# EM4-NEXT: Total Cycles: 1108
# EM4-NEXT: Total uOps: 1900
# EM5-NEXT: Instructions: 1700
# EM5-NEXT: Total Cycles: 1407
# EM5-NEXT: Total uOps: 1900
# ALL: Dispatch Width: 6
# EM3-NEXT: uOps Per Cycle: 3.46
# EM3-NEXT: IPC: 2.96
# EM3-NEXT: Block RThroughput: 4.0
# EM4-NEXT: uOps Per Cycle: 1.71
# EM4-NEXT: IPC: 1.53
# EM4-NEXT: Block RThroughput: 11.0
# EM5-NEXT: uOps Per Cycle: 1.35
# EM5-NEXT: IPC: 1.21
# EM5-NEXT: Block RThroughput: 14.0
# ALL: Instruction Info:
# ALL-NEXT: [1]: #uOps
# ALL-NEXT: [2]: Latency
# ALL-NEXT: [3]: RThroughput
# ALL-NEXT: [4]: MayLoad
# ALL-NEXT: [5]: MayStore
# ALL-NEXT: [6]: HasSideEffects (U)
# EM3: [1] [2] [3] [4] [5] [6] Instructions:
# EM3-NEXT: 1 4 1.00 scvtf s1, w1
# EM3-NEXT: 1 4 1.00 scvtf d2, x2
# EM3-NEXT: 1 3 1.00 fcvtzs w4, s4
# EM3-NEXT: 1 3 1.00 fcvtzs x5, d5
# EM3-NEXT: 1 1 0.33 fmov s7, #4.00000000
# EM3-NEXT: 1 1 0.33 fmov d8, #8.00000000
# EM3-NEXT: 1 1 0.33 fmov s10, w10
# EM3-NEXT: 1 1 0.33 fmov d11, x11
# EM3-NEXT: 2 5 1.00 fmov v12.d[1], x12
# EM3-NEXT: 1 1 0.33 fmov w14, s14
# EM3-NEXT: 1 1 0.33 fmov x15, d15
# EM3-NEXT: 2 5 1.00 fmov x16, v16.d[1]
# EM4: [1] [2] [3] [4] [5] [6] Instructions:
# EM4-NEXT: 1 6 1.00 scvtf h0, w0
# EM4-NEXT: 1 6 1.00 scvtf s1, w1
# EM4-NEXT: 1 6 1.00 scvtf d2, x2
# EM4-NEXT: 1 4 1.00 fcvtzs w3, h3
# EM4-NEXT: 1 4 1.00 fcvtzs w4, s4
# EM4-NEXT: 1 4 1.00 fcvtzs x5, d5
# EM4-NEXT: 1 1 0.33 fmov h6, #2.00000000
# EM4-NEXT: 1 1 0.33 fmov s7, #4.00000000
# EM4-NEXT: 1 1 0.33 fmov d8, #8.00000000
# EM4-NEXT: 1 3 1.00 fmov h9, w9
# EM4-NEXT: 1 3 1.00 fmov s10, w10
# EM4-NEXT: 1 3 1.00 fmov d11, x11
# EM4-NEXT: 2 2 1.00 fmov v12.d[1], x12
# EM4-NEXT: 1 4 1.00 fmov w13, h13
# EM4-NEXT: 1 4 1.00 fmov w14, s14
# EM4-NEXT: 1 4 1.00 fmov x15, d15
# EM4-NEXT: 2 5 1.00 fmov x16, v16.d[1]
# EM5: [1] [2] [3] [4] [5] [6] Instructions:
# EM5-NEXT: 1 6 1.00 scvtf h0, w0
# EM5-NEXT: 1 6 1.00 scvtf s1, w1
# EM5-NEXT: 1 6 1.00 scvtf d2, x2
# EM5-NEXT: 1 4 1.00 fcvtzs w3, h3
# EM5-NEXT: 1 4 1.00 fcvtzs w4, s4
# EM5-NEXT: 1 4 1.00 fcvtzs x5, d5
# EM5-NEXT: 1 1 0.33 fmov h6, #2.00000000
# EM5-NEXT: 1 1 0.33 fmov s7, #4.00000000
# EM5-NEXT: 1 1 0.33 fmov d8, #8.00000000
# EM5-NEXT: 1 4 1.00 fmov h9, w9
# EM5-NEXT: 1 4 1.00 fmov s10, w10
# EM5-NEXT: 1 4 1.00 fmov d11, x11
# EM5-NEXT: 2 6 1.00 fmov v12.d[1], x12
# EM5-NEXT: 1 3 1.00 fmov w13, h13
# EM5-NEXT: 1 3 1.00 fmov w14, s14
# EM5-NEXT: 1 3 1.00 fmov x15, d15
# EM5-NEXT: 2 5 1.00 fmov x16, v16.d[1]

View File

@ -0,0 +1,153 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5
ldr s0, 1f
ldr q0, 1f
ldur d0, [sp, #2]
ldur q0, [sp, #16]
ldr b0, [sp], #1
ldr q0, [sp], #16
ldr h0, [sp, #2]!
ldr q0, [sp, #16]!
ldr s0, [sp, #4]
ldr q0, [sp, #16]
ldr d0, [sp, x0, lsl #3]
ldr q0, [sp, x0, lsl #4]
ldr b0, [sp, x0]
ldr q0, [sp, x0]
ldr h0, [sp, w0, sxtw #1]
ldr q0, [sp, w0, uxtw #4]
ldr s0, [sp, w0, sxtw]
ldr q0, [sp, w0, uxtw]
ldp d0, d1, [sp], #16
ldp q0, q1, [sp], #32
ldp s0, s1, [sp, #8]!
ldp q0, q1, [sp, #32]!
ldp d0, d1, [sp, #16]
ldp q0, q1, [sp, #32]
1:
# ALL: Iterations: 100
# ALL-NEXT: Instructions: 2400
# M3-NEXT: Total Cycles: 4708
# M3-NEXT: Total uOps: 3200
# M4-NEXT: Total Cycles: 4708
# M4-NEXT: Total uOps: 3200
# M5-NEXT: Total Cycles: 5509
# M5-NEXT: Total uOps: 3300
# ALL: Dispatch Width: 6
# M3-NEXT: uOps Per Cycle: 0.68
# M3-NEXT: IPC: 0.51
# M3-NEXT: Block RThroughput: 13.5
# M4-NEXT: uOps Per Cycle: 0.68
# M4-NEXT: IPC: 0.51
# M4-NEXT: Block RThroughput: 13.0
# M5-NEXT: uOps Per Cycle: 0.60
# M5-NEXT: IPC: 0.44
# M5-NEXT: Block RThroughput: 13.5
# ALL: Instruction Info:
# ALL-NEXT: [1]: #uOps
# ALL-NEXT: [2]: Latency
# ALL-NEXT: [3]: RThroughput
# ALL-NEXT: [4]: MayLoad
# ALL-NEXT: [5]: MayStore
# ALL-NEXT: [6]: HasSideEffects (U)
# ALL: [1] [2] [3] [4] [5] [6] Instructions:
# M3-NEXT: 1 5 0.50 * ldr s0, {{\.?}}Ltmp0
# M3-NEXT: 1 5 0.50 * ldr q0, {{\.?}}Ltmp0
# M3-NEXT: 1 5 0.50 * ldur d0, [sp, #2]
# M3-NEXT: 1 5 0.50 * ldur q0, [sp, #16]
# M3-NEXT: 1 5 0.50 * ldr b0, [sp], #1
# M3-NEXT: 1 5 0.50 * ldr q0, [sp], #16
# M3-NEXT: 1 5 0.50 * ldr h0, [sp, #2]!
# M3-NEXT: 1 5 0.50 * ldr q0, [sp, #16]!
# M3-NEXT: 1 5 0.50 * ldr s0, [sp, #4]
# M3-NEXT: 1 5 0.50 * ldr q0, [sp, #16]
# M3-NEXT: 1 5 0.50 * ldr d0, [sp, x0, lsl #3]
# M3-NEXT: 2 6 0.50 * ldr q0, [sp, x0, lsl #4]
# M3-NEXT: 1 5 0.50 * ldr b0, [sp, x0]
# M3-NEXT: 1 5 0.50 * ldr q0, [sp, x0]
# M3-NEXT: 2 6 0.50 * ldr h0, [sp, w0, sxtw #1]
# M3-NEXT: 2 6 0.50 * ldr q0, [sp, w0, uxtw #4]
# M3-NEXT: 2 6 0.50 * ldr s0, [sp, w0, sxtw]
# M3-NEXT: 1 5 0.50 * ldr q0, [sp, w0, uxtw]
# M3-NEXT: 2 5 0.50 * ldp d0, d1, [sp], #16
# M3-NEXT: 2 5 1.00 * ldp q0, q1, [sp], #32
# M3-NEXT: 2 5 0.50 * ldp s0, s1, [sp, #8]!
# M3-NEXT: 2 5 1.00 * ldp q0, q1, [sp, #32]!
# M3-NEXT: 1 5 0.50 * ldp d0, d1, [sp, #16]
# M3-NEXT: 1 5 1.00 * ldp q0, q1, [sp, #32]
# M4-NEXT: 1 5 0.50 * ldr s0, {{\.?}}Ltmp0
# M4-NEXT: 1 5 0.50 * ldr q0, {{\.?}}Ltmp0
# M4-NEXT: 1 5 0.50 * ldur d0, [sp, #2]
# M4-NEXT: 1 5 0.50 * ldur q0, [sp, #16]
# M4-NEXT: 1 5 0.50 * ldr b0, [sp], #1
# M4-NEXT: 1 5 0.50 * ldr q0, [sp], #16
# M4-NEXT: 1 5 0.50 * ldr h0, [sp, #2]!
# M4-NEXT: 1 5 0.50 * ldr q0, [sp, #16]!
# M4-NEXT: 1 5 0.50 * ldr s0, [sp, #4]
# M4-NEXT: 1 5 0.50 * ldr q0, [sp, #16]
# M4-NEXT: 1 5 0.50 * ldr d0, [sp, x0, lsl #3]
# M4-NEXT: 2 6 0.50 * ldr q0, [sp, x0, lsl #4]
# M4-NEXT: 1 5 0.50 * ldr b0, [sp, x0]
# M4-NEXT: 1 5 0.50 * ldr q0, [sp, x0]
# M4-NEXT: 2 6 0.50 * ldr h0, [sp, w0, sxtw #1]
# M4-NEXT: 2 6 0.50 * ldr q0, [sp, w0, uxtw #4]
# M4-NEXT: 2 6 0.50 * ldr s0, [sp, w0, sxtw]
# M4-NEXT: 2 6 0.50 * ldr q0, [sp, w0, uxtw]
# M4-NEXT: 1 5 0.50 * ldp d0, d1, [sp], #16
# M4-NEXT: 2 5 0.50 * ldp q0, q1, [sp], #32
# M4-NEXT: 2 5 0.50 * ldp s0, s1, [sp, #8]!
# M4-NEXT: 2 5 1.00 * ldp q0, q1, [sp, #32]!
# M4-NEXT: 1 5 0.50 * ldp d0, d1, [sp, #16]
# M4-NEXT: 1 5 1.00 * ldp q0, q1, [sp, #32]
# M5-NEXT: 1 6 0.50 * ldr s0, {{\.?}}Ltmp0
# M5-NEXT: 1 6 0.50 * ldr q0, {{\.?}}Ltmp0
# M5-NEXT: 1 6 0.50 * ldur d0, [sp, #2]
# M5-NEXT: 1 6 0.50 * ldur q0, [sp, #16]
# M5-NEXT: 1 6 0.50 * ldr b0, [sp], #1
# M5-NEXT: 1 6 0.50 * ldr q0, [sp], #16
# M5-NEXT: 1 6 0.50 * ldr h0, [sp, #2]!
# M5-NEXT: 1 6 0.50 * ldr q0, [sp, #16]!
# M5-NEXT: 1 6 0.50 * ldr s0, [sp, #4]
# M5-NEXT: 1 6 0.50 * ldr q0, [sp, #16]
# M5-NEXT: 1 6 0.50 * ldr d0, [sp, x0, lsl #3]
# M5-NEXT: 2 7 0.50 * ldr q0, [sp, x0, lsl #4]
# M5-NEXT: 1 6 0.50 * ldr b0, [sp, x0]
# M5-NEXT: 1 6 0.50 * ldr q0, [sp, x0]
# M5-NEXT: 2 7 0.50 * ldr h0, [sp, w0, sxtw #1]
# M5-NEXT: 2 7 0.50 * ldr q0, [sp, w0, uxtw #4]
# M5-NEXT: 2 7 0.50 * ldr s0, [sp, w0, sxtw]
# M5-NEXT: 2 7 0.50 * ldr q0, [sp, w0, uxtw]
# M5-NEXT: 2 6 0.50 * ldp d0, d1, [sp], #16
# M5-NEXT: 2 6 1.00 * ldp q0, q1, [sp], #32
# M5-NEXT: 2 6 0.50 * ldp s0, s1, [sp, #8]!
# M5-NEXT: 2 6 1.00 * ldp q0, q1, [sp, #32]!
# M5-NEXT: 1 6 0.50 * ldp d0, d1, [sp, #16]
# M5-NEXT: 1 6 1.00 * ldp q0, q1, [sp, #32]

View File

@ -0,0 +1,62 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5
fmov s31, #1.00000000
fdiv s30, s31, s30
# Newton series for 1 / x.
frecpe s1, s0
frecps s2, s0, s1
fmul s1, s1, s2
frecps s0, s0, s1
fmul s0, s1, s0
# ALL: Iterations: 100
# ALL-NEXT: Instructions: 700
# M3-NEXT: Total Cycles: 1803
# M4-NEXT: Total Cycles: 1703
# M5-NEXT: Total Cycles: 1703
# ALL-NEXT: Total uOps: 700
# ALL: Dispatch Width: 6
# M3-NEXT: uOps Per Cycle: 0.39
# M3-NEXT: IPC: 0.39
# M3-NEXT: Block RThroughput: 2.0
# M4-NEXT: uOps Per Cycle: 0.41
# M4-NEXT: IPC: 0.41
# M4-NEXT: Block RThroughput: 1.5
# M5-NEXT: uOps Per Cycle: 0.41
# M5-NEXT: IPC: 0.41
# M5-NEXT: Block RThroughput: 1.3
# ALL: Instruction Info:
# ALL-NEXT: [1]: #uOps
# ALL-NEXT: [2]: Latency
# ALL-NEXT: [3]: RThroughput
# ALL-NEXT: [4]: MayLoad
# ALL-NEXT: [5]: MayStore
# ALL-NEXT: [6]: HasSideEffects (U)
# ALL: [1] [2] [3] [4] [5] [6] Instructions:
# ALL-NEXT: 1 1 0.33 fmov s31, #1.00000000
# M3-NEXT: 1 7 2.00 fdiv s30, s31, s30
# M3-NEXT: 1 4 0.50 frecpe s1, s0
# M4-NEXT: 1 7 1.50 fdiv s30, s31, s30
# M4-NEXT: 1 3 0.50 frecpe s1, s0
# M5-NEXT: 1 7 1.00 fdiv s30, s31, s30
# M5-NEXT: 1 3 0.50 frecpe s1, s0
# ALL-NEXT: 1 4 0.33 frecps s2, s0, s1
# ALL-NEXT: 1 3 0.33 fmul s1, s1, s2
# ALL-NEXT: 1 4 0.33 frecps s0, s0, s1
# ALL-NEXT: 1 3 0.33 fmul s0, s1, s0

View File

@ -0,0 +1,72 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5
fsqrt s30, s30
fmov s31, #1.00000000
fdiv s30, s31, s30
# Newton series for 1 / sqrtf().
frsqrte s1, s0
fmul s2, s1, s1
frsqrts s2, s0, s2
fmul s1, s1, s2
fmul s2, s1, s1
frsqrts s0, s0, s2
fmul s0, s1, s0
# ALL: Iterations: 100
# ALL-NEXT: Instructions: 1000
# M3-NEXT: Total Cycles: 2503
# M4-NEXT: Total Cycles: 2303
# M5-NEXT: Total Cycles: 2303
# ALL-NEXT: Total uOps: 1000
# ALL: Dispatch Width: 6
# M3-NEXT: uOps Per Cycle: 0.40
# M3-NEXT: IPC: 0.40
# M3-NEXT: Block RThroughput: 19.0
# M4-NEXT: uOps Per Cycle: 0.43
# M4-NEXT: IPC: 0.43
# M4-NEXT: Block RThroughput: 2.0
# M5-NEXT: uOps Per Cycle: 0.43
# M5-NEXT: IPC: 0.43
# M5-NEXT: Block RThroughput: 2.0
# ALL: Instruction Info:
# ALL-NEXT: [1]: #uOps
# ALL-NEXT: [2]: Latency
# ALL-NEXT: [3]: RThroughput
# ALL-NEXT: [4]: MayLoad
# ALL-NEXT: [5]: MayStore
# ALL-NEXT: [6]: HasSideEffects (U)
# ALL: [1] [2] [3] [4] [5] [6] Instructions:
# M3-NEXT: 1 18 19.00 fsqrt s30, s30
# M4-NEXT: 1 8 1.75 fsqrt s30, s30
# M5-NEXT: 1 8 1.25 fsqrt s30, s30
# ALL-NEXT: 1 1 0.33 fmov s31, #1.00000000
# M3-NEXT: 1 7 2.00 fdiv s30, s31, s30
# M3-NEXT: 1 4 0.50 frsqrte s1, s0
# M4-NEXT: 1 7 1.50 fdiv s30, s31, s30
# M4-NEXT: 1 3 0.50 frsqrte s1, s0
# M5-NEXT: 1 7 1.00 fdiv s30, s31, s30
# M5-NEXT: 1 3 0.50 frsqrte s1, s0
# ALL-NEXT: 1 3 0.33 fmul s2, s1, s1
# ALL-NEXT: 1 4 0.33 frsqrts s2, s0, s2
# ALL-NEXT: 1 3 0.33 fmul s1, s1, s2
# ALL-NEXT: 1 3 0.33 fmul s2, s1, s1
# ALL-NEXT: 1 4 0.33 frsqrts s0, s0, s2
# ALL-NEXT: 1 3 0.33 fmul s0, s1, s0

View File

@ -0,0 +1,73 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5
fsqrt s31, s31
# Newton series for sqrtf().
frsqrte s1, s0
fmul s2, s1, s1
frsqrts s2, s0, s2
fmul s1, s1, s2
fmul s2, s1, s1
frsqrts s2, s0, s2
fmul s2, s2, s0
fmul s1, s1, s2
fcmp s0, #0.0
fcsel s0, s0, s1, eq
# ALL: Iterations: 100
# ALL-NEXT: Instructions: 1100
# M3-NEXT: Total Cycles: 3203
# M4-NEXT: Total Cycles: 3103
# M5-NEXT: Total Cycles: 2803
# ALL-NEXT: Total uOps: 1200
# ALL: Dispatch Width: 6
# M3-NEXT: uOps Per Cycle: 0.37
# M3-NEXT: IPC: 0.34
# M3-NEXT: Block RThroughput: 20.0
# M4-NEXT: uOps Per Cycle: 0.39
# M4-NEXT: IPC: 0.35
# M4-NEXT: Block RThroughput: 2.3
# M5-NEXT: uOps Per Cycle: 0.43
# M5-NEXT: IPC: 0.39
# M5-NEXT: Block RThroughput: 2.3
# ALL: Instruction Info:
# ALL-NEXT: [1]: #uOps
# ALL-NEXT: [2]: Latency
# ALL-NEXT: [3]: RThroughput
# ALL-NEXT: [4]: MayLoad
# ALL-NEXT: [5]: MayStore
# ALL-NEXT: [6]: HasSideEffects (U)
# ALL: [1] [2] [3] [4] [5] [6] Instructions:
# M3-NEXT: 1 18 19.00 fsqrt s31, s31
# M3-NEXT: 1 4 0.50 frsqrte s1, s0
# M4-NEXT: 1 8 1.75 fsqrt s31, s31
# M4-NEXT: 1 3 0.50 frsqrte s1, s0
# M5-NEXT: 1 8 1.25 fsqrt s31, s31
# M5-NEXT: 1 3 0.50 frsqrte s1, s0
# ALL-NEXT: 1 3 0.33 fmul s2, s1, s1
# ALL-NEXT: 1 4 0.33 frsqrts s2, s0, s2
# ALL-NEXT: 1 3 0.33 fmul s1, s1, s2
# ALL-NEXT: 1 3 0.33 fmul s2, s1, s1
# ALL-NEXT: 1 4 0.33 frsqrts s2, s0, s2
# ALL-NEXT: 1 3 0.33 fmul s2, s2, s0
# ALL-NEXT: 1 3 0.33 fmul s1, s1, s2
# ALL-NEXT: 1 2 1.00 fcmp s0, #0.0
# M3-NEXT: 2 5 1.00 fcsel s0, s0, s1, eq
# M4-NEXT: 2 5 1.00 fcsel s0, s0, s1, eq
# M5-NEXT: 2 2 1.00 fcsel s0, s0, s1, eq

View File

@ -0,0 +1,142 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5
stur d0, [sp, #2]
stur q0, [sp, #16]
str b0, [sp], #1
str q0, [sp], #16
str h0, [sp, #2]!
str q0, [sp, #16]!
str s0, [sp, #4]
str q0, [sp, #16]
str d0, [sp, x0, lsl #3]
str q0, [sp, x0, lsl #4]
str b0, [sp, x0]
str q0, [sp, x0]
str h0, [sp, w0, sxtw #1]
str q0, [sp, w0, uxtw #4]
str s0, [sp, w0, sxtw]
str q0, [sp, w0, uxtw]
stp d0, d1, [sp], #16
stp q0, q1, [sp], #32
stp s0, s1, [sp, #8]!
stp q0, q1, [sp, #32]!
stp d0, d1, [sp, #16]
stp q0, q1, [sp, #32]
# ALL: Iterations: 100
# ALL-NEXT: Instructions: 2200
# M3-NEXT: Total Cycles: 3203
# M3-NEXT: Total uOps: 2900
# M4-NEXT: Total Cycles: 3203
# M4-NEXT: Total uOps: 3000
# M5-NEXT: Total Cycles: 2803
# M5-NEXT: Total uOps: 2500
# ALL: Dispatch Width: 6
# M3-NEXT: uOps Per Cycle: 0.91
# M3-NEXT: IPC: 0.69
# M3-NEXT: Block RThroughput: 22.0
# M4-NEXT: uOps Per Cycle: 0.94
# M4-NEXT: IPC: 0.69
# M4-NEXT: Block RThroughput: 12.5
# M5-NEXT: uOps Per Cycle: 0.89
# M5-NEXT: IPC: 0.78
# M5-NEXT: Block RThroughput: 11.0
# ALL: Instruction Info:
# ALL-NEXT: [1]: #uOps
# ALL-NEXT: [2]: Latency
# ALL-NEXT: [3]: RThroughput
# ALL-NEXT: [4]: MayLoad
# ALL-NEXT: [5]: MayStore
# ALL-NEXT: [6]: HasSideEffects (U)
# ALL: [1] [2] [3] [4] [5] [6] Instructions:
# M3-NEXT: 1 1 1.00 * stur d0, [sp, #2]
# M3-NEXT: 1 1 1.00 * stur q0, [sp, #16]
# M3-NEXT: 1 1 1.00 * str b0, [sp], #1
# M3-NEXT: 1 1 1.00 * str q0, [sp], #16
# M3-NEXT: 1 1 1.00 * str h0, [sp, #2]!
# M3-NEXT: 1 1 1.00 * str q0, [sp, #16]!
# M3-NEXT: 1 1 1.00 * str s0, [sp, #4]
# M3-NEXT: 1 1 1.00 * str q0, [sp, #16]
# M3-NEXT: 1 1 1.00 * str d0, [sp, x0, lsl #3]
# M3-NEXT: 2 3 1.00 * str q0, [sp, x0, lsl #4]
# M3-NEXT: 1 1 1.00 * str b0, [sp, x0]
# M3-NEXT: 1 1 1.00 * str q0, [sp, x0]
# M3-NEXT: 2 3 1.00 * str h0, [sp, w0, sxtw #1]
# M3-NEXT: 2 3 1.00 * str q0, [sp, w0, uxtw #4]
# M3-NEXT: 2 3 1.00 * str s0, [sp, w0, sxtw]
# M3-NEXT: 2 3 1.00 * str q0, [sp, w0, uxtw]
# M3-NEXT: 1 1 1.00 * stp d0, d1, [sp], #16
# M3-NEXT: 2 1 1.00 * stp q0, q1, [sp], #32
# M3-NEXT: 1 1 1.00 * stp s0, s1, [sp, #8]!
# M3-NEXT: 2 1 1.00 * stp q0, q1, [sp, #32]!
# M3-NEXT: 1 1 1.00 * stp d0, d1, [sp, #16]
# M3-NEXT: 1 1 1.00 * stp q0, q1, [sp, #32]
# M4-NEXT: 1 1 0.50 * stur d0, [sp, #2]
# M4-NEXT: 1 1 0.50 * stur q0, [sp, #16]
# M4-NEXT: 1 1 0.50 * str b0, [sp], #1
# M4-NEXT: 1 1 0.50 * str q0, [sp], #16
# M4-NEXT: 1 1 0.50 * str h0, [sp, #2]!
# M4-NEXT: 1 1 0.50 * str q0, [sp, #16]!
# M4-NEXT: 1 1 0.50 * str s0, [sp, #4]
# M4-NEXT: 1 1 0.50 * str q0, [sp, #16]
# M4-NEXT: 1 1 0.50 * str d0, [sp, x0, lsl #3]
# M4-NEXT: 2 3 0.50 * str q0, [sp, x0, lsl #4]
# M4-NEXT: 1 1 0.50 * str b0, [sp, x0]
# M4-NEXT: 1 1 0.50 * str q0, [sp, x0]
# M4-NEXT: 2 3 0.50 * str h0, [sp, w0, sxtw #1]
# M4-NEXT: 2 3 0.50 * str q0, [sp, w0, uxtw #4]
# M4-NEXT: 2 3 0.50 * str s0, [sp, w0, sxtw]
# M4-NEXT: 2 3 0.50 * str q0, [sp, w0, uxtw]
# M4-NEXT: 1 1 0.50 * stp d0, d1, [sp], #16
# M4-NEXT: 2 1 1.00 * stp q0, q1, [sp], #32
# M4-NEXT: 1 1 0.50 * stp s0, s1, [sp, #8]!
# M4-NEXT: 2 1 1.00 * stp q0, q1, [sp, #32]!
# M4-NEXT: 1 1 0.50 * stp d0, d1, [sp, #16]
# M4-NEXT: 2 1 1.00 * stp q0, q1, [sp, #32]
# M5-NEXT: 1 1 0.50 * stur d0, [sp, #2]
# M5-NEXT: 1 1 0.50 * stur q0, [sp, #16]
# M5-NEXT: 1 1 0.50 * str b0, [sp], #1
# M5-NEXT: 1 1 0.50 * str q0, [sp], #16
# M5-NEXT: 1 1 0.50 * str h0, [sp, #2]!
# M5-NEXT: 1 1 0.50 * str q0, [sp, #16]!
# M5-NEXT: 1 1 0.50 * str s0, [sp, #4]
# M5-NEXT: 1 1 0.50 * str q0, [sp, #16]
# M5-NEXT: 1 1 0.50 * str d0, [sp, x0, lsl #3]
# M5-NEXT: 2 3 0.50 * str q0, [sp, x0, lsl #4]
# M5-NEXT: 1 1 0.50 * str b0, [sp, x0]
# M5-NEXT: 1 1 0.50 * str q0, [sp, x0]
# M5-NEXT: 1 1 0.50 * str h0, [sp, w0, sxtw #1]
# M5-NEXT: 2 3 0.50 * str q0, [sp, w0, uxtw #4]
# M5-NEXT: 1 1 0.50 * str s0, [sp, w0, sxtw]
# M5-NEXT: 2 3 0.50 * str q0, [sp, w0, uxtw]
# M5-NEXT: 1 1 0.50 * stp d0, d1, [sp], #16
# M5-NEXT: 1 1 1.00 * stp q0, q1, [sp], #32
# M5-NEXT: 1 1 0.50 * stp s0, s1, [sp, #8]!
# M5-NEXT: 1 1 1.00 * stp q0, q1, [sp, #32]!
# M5-NEXT: 1 1 0.50 * stp d0, d1, [sp, #16]
# M5-NEXT: 1 1 1.00 * stp q0, q1, [sp, #32]

View File

@ -0,0 +1,66 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5
ldr w0, 1f
ldur x0, [sp, #8]
ldrb w0, [sp], #1
ldrsh w0, [sp, #2]!
ldr x0, [sp, #8]
ldrb w0, [sp, x31]
ldrsh w0, [sp, x31, lsl #1]
ldr w0, [sp, w31, sxtw]
ldr x0, [sp, w31, uxtw #3]
ldnp w0, w1, [sp, #8]
ldp x0, x1, [sp], #16
ldpsw x0, x1, [sp, #8]!
1:
# ALL: Iterations: 100
# ALL-NEXT: Instructions: 1200
# ALL-NEXT: Total Cycles: 1904
# M3-NEXT: Total uOps: 1600
# M4-NEXT: Total uOps: 1400
# M5-NEXT: Total uOps: 1400
# ALL: Dispatch Width: 6
# M3-NEXT: uOps Per Cycle: 0.84
# M4-NEXT: uOps Per Cycle: 0.74
# M5-NEXT: uOps Per Cycle: 0.74
# ALL-NEXT: IPC: 0.63
# ALL-NEXT: Block RThroughput: 6.0
# ALL: Instruction Info:
# ALL-NEXT: [1]: #uOps
# ALL-NEXT: [2]: Latency
# ALL-NEXT: [3]: RThroughput
# ALL-NEXT: [4]: MayLoad
# ALL-NEXT: [5]: MayStore
# ALL-NEXT: [6]: HasSideEffects (U)
# ALL: [1] [2] [3] [4] [5] [6] Instructions:
# ALL-NEXT: 1 4 0.50 * ldr w0, {{\.?}}Ltmp0
# ALL-NEXT: 1 4 0.50 * ldur x0, [sp, #8]
# ALL-NEXT: 1 4 0.50 * ldrb w0, [sp], #1
# ALL-NEXT: 1 4 0.50 * ldrsh w0, [sp, #2]!
# ALL-NEXT: 1 4 0.50 * ldr x0, [sp, #8]
# ALL-NEXT: 1 4 0.50 * ldrb w0, [sp, xzr]
# ALL-NEXT: 1 5 0.50 * ldrsh w0, [sp, xzr, lsl #1]
# M3-NEXT: 2 5 0.50 * ldr w0, [sp, wzr, sxtw]
# M3-NEXT: 2 5 0.50 * ldr x0, [sp, wzr, uxtw #3]
# M4-NEXT: 1 5 0.50 * ldr w0, [sp, wzr, sxtw]
# M4-NEXT: 1 5 0.50 * ldr x0, [sp, wzr, uxtw #3]
# M5-NEXT: 1 5 0.50 * ldr w0, [sp, wzr, sxtw]
# M5-NEXT: 1 5 0.50 * ldr x0, [sp, wzr, uxtw #3]
# ALL-NEXT: 1 4 0.50 * ldnp w0, w1, [sp, #8]
# ALL-NEXT: 2 4 0.50 * ldp x0, x1, [sp], #16
# ALL-NEXT: 2 4 0.50 * ldpsw x0, x1, [sp, #8]!

View File

@ -1,6 +1,7 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -iterations=1 -scheduler-stats -resource-pressure=false -instruction-info=false < %s | FileCheck %s -check-prefixes=ALL,M3 # RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -iterations=1 -scheduler-stats -resource-pressure=false -instruction-info=false < %s | FileCheck %s -check-prefixes=ALL,M3
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -iterations=1 -scheduler-stats -resource-pressure=false -instruction-info=false < %s | FileCheck %s -check-prefixes=ALL,M4 # RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -iterations=1 -scheduler-stats -resource-pressure=false -instruction-info=false < %s | FileCheck %s -check-prefixes=ALL,M4
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -iterations=1 -scheduler-stats -resource-pressure=false -instruction-info=false < %s | FileCheck %s -check-prefixes=ALL,M5
b main b main
@ -19,6 +20,11 @@
# M4-NEXT: IPC: 0.50 # M4-NEXT: IPC: 0.50
# M4-NEXT: Block RThroughput: 0.2 # M4-NEXT: Block RThroughput: 0.2
# M5: Dispatch Width: 6
# M5-NEXT: uOps Per Cycle: 0.50
# M5-NEXT: IPC: 0.50
# M5-NEXT: Block RThroughput: 0.2
# ALL: Schedulers - number of cycles where we saw N micro opcodes issued: # ALL: Schedulers - number of cycles where we saw N micro opcodes issued:
# ALL-NEXT: [# issued], [# cycles] # ALL-NEXT: [# issued], [# cycles]
# ALL-NEXT: 0, 1 (50.0%) # ALL-NEXT: 0, 1 (50.0%)

View File

@ -1,6 +1,7 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,EM3 # RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,EM3
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,EM4 # RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,EM4
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,EM5
adds w0, w1, w2, lsl #0 adds w0, w1, w2, lsl #0
sub x3, x4, x5, lsr #1 sub x3, x4, x5, lsr #1
@ -9,13 +10,14 @@
adds w12, w13, w14, lsl #4 adds w12, w13, w14, lsl #4
sub x15, x16, x17, lsr #6 sub x15, x16, x17, lsr #6
ands x18, x19, x20, lsl #8 ands x18, x19, x20, lsl #8
orr w21, w22, w23, asr #10 eor w21, w22, w23, asr #10
# ALL: Iterations: 100 # ALL: Iterations: 100
# ALL-NEXT: Instructions: 800 # ALL-NEXT: Instructions: 800
# EM3-NEXT: Total Cycles: 354 # EM3-NEXT: Total Cycles: 354
# EM4-NEXT: Total Cycles: 329 # EM4-NEXT: Total Cycles: 329
# EM5-NEXT: Total Cycles: 220
# ALL-NEXT: Total uOps: 800 # ALL-NEXT: Total uOps: 800
@ -29,6 +31,11 @@
# EM4-NEXT: IPC: 2.43 # EM4-NEXT: IPC: 2.43
# EM4-NEXT: Block RThroughput: 3.3 # EM4-NEXT: Block RThroughput: 3.3
# EM5: Dispatch Width: 6
# EM5-NEXT: uOps Per Cycle: 3.64
# EM5-NEXT: IPC: 3.64
# EM5-NEXT: Block RThroughput: 1.5
# ALL: Instruction Info: # ALL: Instruction Info:
# ALL-NEXT: [1]: #uOps # ALL-NEXT: [1]: #uOps
# ALL-NEXT: [2]: Latency # ALL-NEXT: [2]: Latency
@ -46,7 +53,7 @@
# EM3-NEXT: 1 2 0.50 adds w12, w13, w14, lsl #4 # EM3-NEXT: 1 2 0.50 adds w12, w13, w14, lsl #4
# EM3-NEXT: 1 2 0.50 sub x15, x16, x17, lsr #6 # EM3-NEXT: 1 2 0.50 sub x15, x16, x17, lsr #6
# EM3-NEXT: 1 2 0.50 ands x18, x19, x20, lsl #8 # EM3-NEXT: 1 2 0.50 ands x18, x19, x20, lsl #8
# EM3-NEXT: 1 2 0.50 orr w21, w22, w23, asr #10 # EM3-NEXT: 1 2 0.50 eor w21, w22, w23, asr #10
# EM4-NEXT: 1 1 0.25 adds w0, w1, w2 # EM4-NEXT: 1 1 0.25 adds w0, w1, w2
# EM4-NEXT: 1 2 0.50 sub x3, x4, x5, lsr #1 # EM4-NEXT: 1 2 0.50 sub x3, x4, x5, lsr #1
@ -55,4 +62,13 @@
# EM4-NEXT: 1 2 0.50 adds w12, w13, w14, lsl #4 # EM4-NEXT: 1 2 0.50 adds w12, w13, w14, lsl #4
# EM4-NEXT: 1 2 0.50 sub x15, x16, x17, lsr #6 # EM4-NEXT: 1 2 0.50 sub x15, x16, x17, lsr #6
# EM4-NEXT: 1 1 0.25 ands x18, x19, x20, lsl #8 # EM4-NEXT: 1 1 0.25 ands x18, x19, x20, lsl #8
# EM4-NEXT: 1 2 0.50 orr w21, w22, w23, asr #10 # EM4-NEXT: 1 2 0.50 eor w21, w22, w23, asr #10
# EM5-NEXT: 1 1 0.17 adds w0, w1, w2
# EM5-NEXT: 1 2 0.50 sub x3, x4, x5, lsr #1
# EM5-NEXT: 1 1 0.25 ands x6, x7, x8, lsl #2
# EM5-NEXT: 1 2 0.33 orr w9, w10, w11, asr #3
# EM5-NEXT: 1 2 0.33 adds w12, w13, w14, lsl #4
# EM5-NEXT: 1 2 0.50 sub x15, x16, x17, lsr #6
# EM5-NEXT: 1 1 0.25 ands x18, x19, x20, lsl #8
# EM5-NEXT: 1 2 0.33 eor w21, w22, w23, asr #10

View File

@ -0,0 +1,82 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5
stur x0, [sp, #8]
strb w0, [sp], #1
strh w0, [sp, #2]!
str x0, [sp, #8]
strb w0, [sp, x31]
strh w0, [sp, x31, lsl #1]
str w0, [sp, w31, sxtw]
str x0, [sp, w31, uxtw #3]
stnp w0, w1, [sp, #8]
stp x0, x1, [sp], #16
stp w0, w1, [sp, #8]!
# ALL: Iterations: 100
# ALL-NEXT: Instructions: 1100
# ALL-NEXT: Total Cycles: 1303
# M3-NEXT: Total uOps: 1300
# M4-NEXT: Total uOps: 1100
# M5-NEXT: Total uOps: 1100
# ALL: Dispatch Width: 6
# M3-NEXT: uOps Per Cycle: 1.00
# M4-NEXT: uOps Per Cycle: 0.84
# M5-NEXT: uOps Per Cycle: 0.84
# ALL-NEXT: IPC: 0.84
# M3-NEXT: Block RThroughput: 11.0
# M4-NEXT: Block RThroughput: 5.5
# M5-NEXT: Block RThroughput: 5.5
# ALL: Instruction Info:
# ALL-NEXT: [1]: #uOps
# ALL-NEXT: [2]: Latency
# ALL-NEXT: [3]: RThroughput
# ALL-NEXT: [4]: MayLoad
# ALL-NEXT: [5]: MayStore
# ALL-NEXT: [6]: HasSideEffects (U)
# ALL: [1] [2] [3] [4] [5] [6] Instructions:
# M3-NEXT: 1 1 1.00 * stur x0, [sp, #8]
# M3-NEXT: 1 1 1.00 * strb w0, [sp], #1
# M3-NEXT: 1 1 1.00 * strh w0, [sp, #2]!
# M3-NEXT: 1 1 1.00 * str x0, [sp, #8]
# M3-NEXT: 1 1 1.00 * strb w0, [sp, xzr]
# M3-NEXT: 1 1 1.00 * strh w0, [sp, xzr, lsl #1]
# M3-NEXT: 2 2 1.00 * str w0, [sp, wzr, sxtw]
# M3-NEXT: 2 2 1.00 * str x0, [sp, wzr, uxtw #3]
# M3-NEXT: 1 1 1.00 * stnp w0, w1, [sp, #8]
# M3-NEXT: 1 1 1.00 * stp x0, x1, [sp], #16
# M3-NEXT: 1 1 1.00 * stp w0, w1, [sp, #8]!
# M4-NEXT: 1 1 0.50 * stur x0, [sp, #8]
# M4-NEXT: 1 1 0.50 * strb w0, [sp], #1
# M4-NEXT: 1 1 0.50 * strh w0, [sp, #2]!
# M4-NEXT: 1 1 0.50 * str x0, [sp, #8]
# M4-NEXT: 1 1 0.50 * strb w0, [sp, xzr]
# M4-NEXT: 1 1 0.50 * strh w0, [sp, xzr, lsl #1]
# M4-NEXT: 1 2 0.50 * str w0, [sp, wzr, sxtw]
# M4-NEXT: 1 2 0.50 * str x0, [sp, wzr, uxtw #3]
# M4-NEXT: 1 1 0.50 * stnp w0, w1, [sp, #8]
# M4-NEXT: 1 1 0.50 * stp x0, x1, [sp], #16
# M4-NEXT: 1 1 0.50 * stp w0, w1, [sp, #8]!
# M5-NEXT: 1 1 0.50 * stur x0, [sp, #8]
# M5-NEXT: 1 1 0.50 * strb w0, [sp], #1
# M5-NEXT: 1 1 0.50 * strh w0, [sp, #2]!
# M5-NEXT: 1 1 0.50 * str x0, [sp, #8]
# M5-NEXT: 1 1 0.50 * strb w0, [sp, xzr]
# M5-NEXT: 1 1 0.50 * strh w0, [sp, xzr, lsl #1]
# M5-NEXT: 1 2 0.50 * str w0, [sp, wzr, sxtw]
# M5-NEXT: 1 2 0.50 * str x0, [sp, wzr, uxtw #3]
# M5-NEXT: 1 1 0.50 * stnp w0, w1, [sp, #8]
# M5-NEXT: 1 1 0.50 * stp x0, x1, [sp], #16
# M5-NEXT: 1 1 0.50 * stp w0, w1, [sp, #8]!

View File

@ -1,6 +1,7 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3 # RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4 # RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5
mov x0, x1 mov x0, x1
mov sp, x0 mov sp, x0
@ -22,21 +23,13 @@
# ALL: Iterations: 100 # ALL: Iterations: 100
# ALL-NEXT: Instructions: 1000 # ALL-NEXT: Instructions: 1000
# ALL-NEXT: Total Cycles: 172
# M3-NEXT: Total Cycles: 172
# M4-NEXT: Total Cycles: 172
# ALL-NEXT: Total uOps: 1000 # ALL-NEXT: Total uOps: 1000
# M3: Dispatch Width: 6 # ALL: Dispatch Width: 6
# M3-NEXT: uOps Per Cycle: 5.81 # ALL-NEXT: uOps Per Cycle: 5.81
# M3-NEXT: IPC: 5.81 # ALL-NEXT: IPC: 5.81
# M3-NEXT: Block RThroughput: 1.7 # ALL-NEXT: Block RThroughput: 1.7
# M4: Dispatch Width: 6
# M4-NEXT: uOps Per Cycle: 5.81
# M4-NEXT: IPC: 5.81
# M4-NEXT: Block RThroughput: 1.7
# ALL: Instruction Info: # ALL: Instruction Info:
# ALL-NEXT: [1]: #uOps # ALL-NEXT: [1]: #uOps
@ -47,25 +40,21 @@
# ALL-NEXT: [6]: HasSideEffects (U) # ALL-NEXT: [6]: HasSideEffects (U)
# ALL: [1] [2] [3] [4] [5] [6] Instructions: # ALL: [1] [2] [3] [4] [5] [6] Instructions:
# ALL-NEXT: 1 0 0.17 mov x0, x1
# ALL-NEXT: 1 0 0.17 mov sp, x0
# ALL-NEXT: 1 0 0.17 mov w0, #12816
# M3-NEXT: 1 0 0.17 mov x0, x1
# M3-NEXT: 1 0 0.17 mov sp, x0
# M3-NEXT: 1 0 0.17 mov w0, #12816
# M3-NEXT: 1 1 0.25 add w0, w1, #0 # M3-NEXT: 1 1 0.25 add w0, w1, #0
# M3-NEXT: 1 0 0.17 adr x0, {{\.?}}Ltmp0
# M3-NEXT: 1 4 0.50 * ldr x0, [x0]
# M3-NEXT: 1 0 0.17 adrp x0, {{\.?}}Ltmp0
# M3-NEXT: 1 1 0.25 add x0, x0, :lo12:{{\.?}}Ltmp0
# M3-NEXT: 1 1 0.33 fmov s0, s1
# M3-NEXT: 1 0 0.17 movi d0, #0000000000000000
# M4-NEXT: 1 0 0.17 mov x0, x1
# M4-NEXT: 1 0 0.17 mov sp, x0
# M4-NEXT: 1 0 0.17 mov w0, #12816
# M4-NEXT: 1 1 0.25 add w0, w1, #0 # M4-NEXT: 1 1 0.25 add w0, w1, #0
# M4-NEXT: 1 0 0.17 adr x0, {{\.?}}Ltmp0 # M5-NEXT: 1 1 0.17 add w0, w1, #0
# M4-NEXT: 1 4 0.50 * ldr x0, [x0]
# M4-NEXT: 1 0 0.17 adrp x0, {{\.?}}Ltmp0 # ALL-NEXT: 1 0 0.17 adr x0, {{\.?}}Ltmp0
# M4-NEXT: 1 1 0.25 add x0, x0, :lo12:{{\.?}}Ltmp0 # ALL-NEXT: 1 4 0.50 * ldr x0, [x0]
# ALL-NEXT: 1 0 0.17 adrp x0, {{\.?}}Ltmp0
# ALL-NEXT: 1 1 0.25 add x0, x0, :lo12:{{\.?}}Ltmp0
# M3-NEXT: 1 1 0.33 fmov s0, s1
# M4-NEXT: 1 1 0.33 fmov s0, s1 # M4-NEXT: 1 1 0.33 fmov s0, s1
# M4-NEXT: 1 0 0.17 movi d0, #0000000000000000 # M5-NEXT: 1 2 0.33 fmov s0, s1
# ALL-NEXT: 1 0 0.17 movi d0, #0000000000000000