forked from OSchip/llvm-project
Remove some AddedComplexity tags that were forcing priority for AVX over SSE. Use predicates instead.
llvm-svn: 200458
This commit is contained in:
parent
ed0d1ccc95
commit
c45da1619c
|
@ -3055,24 +3055,26 @@ let isCodeGenOnly = 1 in {
|
||||||
// we now generate:
|
// we now generate:
|
||||||
// addss %xmm1, %xmm0
|
// addss %xmm1, %xmm0
|
||||||
|
|
||||||
def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst), (v4f32 (scalar_to_vector (fadd
|
let Predicates = [UseSSE1] in {
|
||||||
(f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))),
|
def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst), (v4f32 (scalar_to_vector (fadd
|
||||||
FR32:$src))))),
|
(f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))),
|
||||||
(ADDSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>;
|
FR32:$src))))),
|
||||||
def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst), (v4f32 (scalar_to_vector (fsub
|
(ADDSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>;
|
||||||
(f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))),
|
def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst), (v4f32 (scalar_to_vector (fsub
|
||||||
FR32:$src))))),
|
(f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))),
|
||||||
(SUBSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>;
|
FR32:$src))))),
|
||||||
def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst), (v4f32 (scalar_to_vector (fmul
|
(SUBSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>;
|
||||||
(f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))),
|
def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst), (v4f32 (scalar_to_vector (fmul
|
||||||
FR32:$src))))),
|
(f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))),
|
||||||
(MULSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>;
|
FR32:$src))))),
|
||||||
def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst), (v4f32 (scalar_to_vector (fdiv
|
(MULSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>;
|
||||||
(f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))),
|
def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst), (v4f32 (scalar_to_vector (fdiv
|
||||||
FR32:$src))))),
|
(f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))),
|
||||||
(DIVSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>;
|
FR32:$src))))),
|
||||||
|
(DIVSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>;
|
||||||
|
}
|
||||||
|
|
||||||
let Predicates = [HasSSE2] in {
|
let Predicates = [UseSSE2] in {
|
||||||
// SSE2 patterns to select scalar double-precision fp arithmetic instructions
|
// SSE2 patterns to select scalar double-precision fp arithmetic instructions
|
||||||
|
|
||||||
def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst), (v2f64 (scalar_to_vector (fadd
|
def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst), (v2f64 (scalar_to_vector (fadd
|
||||||
|
@ -3117,11 +3119,9 @@ let Predicates = [UseSSE41] in {
|
||||||
(DIVSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>;
|
(DIVSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>;
|
||||||
}
|
}
|
||||||
|
|
||||||
let AddedComplexity = 20, Predicates = [HasAVX] in {
|
let Predicates = [HasAVX] in {
|
||||||
// The following patterns select AVX Scalar single/double precision fp
|
// The following patterns select AVX Scalar single/double precision fp
|
||||||
// arithmetic instructions.
|
// arithmetic instructions.
|
||||||
// The 'AddedComplexity' is required to give them higher priority over
|
|
||||||
// the equivalent SSE/SSE2 patterns.
|
|
||||||
|
|
||||||
def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst), (v2f64 (scalar_to_vector (fadd
|
def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst), (v2f64 (scalar_to_vector (fadd
|
||||||
(f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))),
|
(f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))),
|
||||||
|
@ -3176,20 +3176,22 @@ let AddedComplexity = 20, Predicates = [HasAVX] in {
|
||||||
// we now generate:
|
// we now generate:
|
||||||
// addss %xmm1, %xmm0
|
// addss %xmm1, %xmm0
|
||||||
|
|
||||||
def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst),
|
let Predicates = [UseSSE1] in {
|
||||||
(fadd (v4f32 VR128:$dst), (v4f32 VR128:$src)))),
|
def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst),
|
||||||
(ADDSSrr_Int v4f32:$dst, v4f32:$src)>;
|
(fadd (v4f32 VR128:$dst), (v4f32 VR128:$src)))),
|
||||||
def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst),
|
(ADDSSrr_Int v4f32:$dst, v4f32:$src)>;
|
||||||
(fsub (v4f32 VR128:$dst), (v4f32 VR128:$src)))),
|
def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst),
|
||||||
(SUBSSrr_Int v4f32:$dst, v4f32:$src)>;
|
(fsub (v4f32 VR128:$dst), (v4f32 VR128:$src)))),
|
||||||
def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst),
|
(SUBSSrr_Int v4f32:$dst, v4f32:$src)>;
|
||||||
(fmul (v4f32 VR128:$dst), (v4f32 VR128:$src)))),
|
def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst),
|
||||||
(MULSSrr_Int v4f32:$dst, v4f32:$src)>;
|
(fmul (v4f32 VR128:$dst), (v4f32 VR128:$src)))),
|
||||||
def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst),
|
(MULSSrr_Int v4f32:$dst, v4f32:$src)>;
|
||||||
(fdiv (v4f32 VR128:$dst), (v4f32 VR128:$src)))),
|
def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst),
|
||||||
(DIVSSrr_Int v4f32:$dst, v4f32:$src)>;
|
(fdiv (v4f32 VR128:$dst), (v4f32 VR128:$src)))),
|
||||||
|
(DIVSSrr_Int v4f32:$dst, v4f32:$src)>;
|
||||||
|
}
|
||||||
|
|
||||||
let Predicates = [HasSSE2] in {
|
let Predicates = [UseSSE2] in {
|
||||||
// SSE2 patterns to select scalar double-precision fp arithmetic instructions
|
// SSE2 patterns to select scalar double-precision fp arithmetic instructions
|
||||||
// from a packed double-precision fp instruction plus movsd.
|
// from a packed double-precision fp instruction plus movsd.
|
||||||
|
|
||||||
|
@ -3207,12 +3209,10 @@ let Predicates = [HasSSE2] in {
|
||||||
(DIVSDrr_Int v2f64:$dst, v2f64:$src)>;
|
(DIVSDrr_Int v2f64:$dst, v2f64:$src)>;
|
||||||
}
|
}
|
||||||
|
|
||||||
let AddedComplexity = 20, Predicates = [HasAVX] in {
|
let Predicates = [HasAVX] in {
|
||||||
// The following patterns select AVX Scalar single/double precision fp
|
// The following patterns select AVX Scalar single/double precision fp
|
||||||
// arithmetic instructions from a packed single precision fp instruction
|
// arithmetic instructions from a packed single precision fp instruction
|
||||||
// plus movss/movsd.
|
// plus movss/movsd.
|
||||||
// The 'AddedComplexity' is required to give them higher priority over
|
|
||||||
// the equivalent SSE/SSE2 patterns.
|
|
||||||
|
|
||||||
def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst),
|
def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst),
|
||||||
(fadd (v4f32 VR128:$dst), (v4f32 VR128:$src)))),
|
(fadd (v4f32 VR128:$dst), (v4f32 VR128:$src)))),
|
||||||
|
|
Loading…
Reference in New Issue