forked from OSchip/llvm-project
[AArch64] Split out processor/tuning features
Following on from an earlier patch that introduced support for -mtune for AArch64 backends, this patch splits out the tuning features from the processor features. This gives us the ability to enable architectural feature set A for a given processor with "-mcpu=A" and define the set of tuning features B with "-mtune=B". It's quite difficult to write a test that proves we select the right features according to the tuning attribute because most of these relate to scheduling. I have created a test here: CodeGen/AArch64/misched-fusion-addr-tune.ll that demonstrates the different scheduling choices based upon the tuning. Differential Revision: https://reviews.llvm.org/D111551
This commit is contained in:
parent
23db763b7d
commit
5ea35791e6
File diff suppressed because it is too large
Load Diff
|
@ -197,6 +197,9 @@ protected:
|
||||||
bool HasSMEI64 = false;
|
bool HasSMEI64 = false;
|
||||||
bool HasStreamingSVE = false;
|
bool HasStreamingSVE = false;
|
||||||
|
|
||||||
|
// AppleA7 system register.
|
||||||
|
bool HasAppleA7SysReg = false;
|
||||||
|
|
||||||
// Future architecture extensions.
|
// Future architecture extensions.
|
||||||
bool HasETE = false;
|
bool HasETE = false;
|
||||||
bool HasTRBE = false;
|
bool HasTRBE = false;
|
||||||
|
|
|
@ -1635,7 +1635,7 @@ def : RWSysReg<"PMSNEVFR_EL1", 0b11, 0b000, 0b1001, 0b1001, 0b001>;
|
||||||
|
|
||||||
// Cyclone specific system registers
|
// Cyclone specific system registers
|
||||||
// Op0 Op1 CRn CRm Op2
|
// Op0 Op1 CRn CRm Op2
|
||||||
let Requires = [{ {AArch64::ProcAppleA7} }] in
|
let Requires = [{ {AArch64::FeatureAppleA7SysReg} }] in
|
||||||
def : RWSysReg<"CPM_IOACC_CTL_EL3", 0b11, 0b111, 0b1111, 0b0010, 0b000>;
|
def : RWSysReg<"CPM_IOACC_CTL_EL3", 0b11, 0b111, 0b1111, 0b0010, 0b000>;
|
||||||
|
|
||||||
// Scalable Matrix Extension (SME)
|
// Scalable Matrix Extension (SME)
|
||||||
|
|
|
@ -0,0 +1,40 @@
|
||||||
|
; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a65 | FileCheck %s
|
||||||
|
|
||||||
|
@var_float = dso_local global float 0.0
|
||||||
|
@var_double = dso_local global double 0.0
|
||||||
|
@var_double2 = dso_local global <2 x double> <double 0.0, double 0.0>
|
||||||
|
|
||||||
|
define dso_local void @ldst_double() {
|
||||||
|
%valf = load volatile float, float* @var_float
|
||||||
|
%vale = fpext float %valf to double
|
||||||
|
%vald = load volatile double, double* @var_double
|
||||||
|
%vald1 = insertelement <2 x double> undef, double %vald, i32 0
|
||||||
|
%vald2 = insertelement <2 x double> %vald1, double %vale, i32 1
|
||||||
|
store volatile <2 x double> %vald2, <2 x double>* @var_double2
|
||||||
|
ret void
|
||||||
|
|
||||||
|
; CHECK-LABEL: ldst_double:
|
||||||
|
; CHECK: adrp [[RD:x[0-9]+]], var_double
|
||||||
|
; CHECK-NEXT: ldr {{d[0-9]+}}, {{\[}}[[RD]], {{#?}}:lo12:var_double{{\]}}
|
||||||
|
; CHECK: adrp [[RQ:x[0-9]+]], var_double2
|
||||||
|
; CHECK-NEXT: str {{q[0-9]+}}, {{\[}}[[RQ]], {{#?}}:lo12:var_double2{{\]}}
|
||||||
|
}
|
||||||
|
|
||||||
|
define dso_local void @ldst_double_tune_a53() #0 {
|
||||||
|
%valf = load volatile float, float* @var_float
|
||||||
|
%vale = fpext float %valf to double
|
||||||
|
%vald = load volatile double, double* @var_double
|
||||||
|
%vald1 = insertelement <2 x double> undef, double %vald, i32 0
|
||||||
|
%vald2 = insertelement <2 x double> %vald1, double %vale, i32 1
|
||||||
|
store volatile <2 x double> %vald2, <2 x double>* @var_double2
|
||||||
|
ret void
|
||||||
|
|
||||||
|
; CHECK-LABEL: ldst_double_tune_a53:
|
||||||
|
; CHECK: adrp [[RD:x[0-9]+]], var_double
|
||||||
|
; CHECK-NEXT: ldr {{d[0-9]+}}, {{\[}}[[RD]], {{#?}}:lo12:var_double{{\]}}
|
||||||
|
; CHECK-NEXT: adrp [[RQ:x[0-9]+]], var_double2
|
||||||
|
; CHECK: fcvt
|
||||||
|
; CHECK: str {{q[0-9]+}}, {{\[}}[[RQ]], {{#?}}:lo12:var_double2{{\]}}
|
||||||
|
}
|
||||||
|
|
||||||
|
attributes #0 = { "tune-cpu"="cortex-a53" }
|
Loading…
Reference in New Issue