forked from OSchip/llvm-project
[AVX512] Copy the patterns that recognize scalar arimetic operations inserting into the lower element of a packed vector from AVX/SSE so that we can use EVEX encoded instructions.
llvm-svn: 277119
This commit is contained in:
parent
07aa37039e
commit
5625d24977
|
@ -8027,3 +8027,106 @@ defm VFIXUPIMMPS : avx512_fixupimm_packed_all<avx512vl_f32_info>,
|
||||||
EVEX_CD8<32, CD8VF>;
|
EVEX_CD8<32, CD8VF>;
|
||||||
defm VFIXUPIMMPD : avx512_fixupimm_packed_all<avx512vl_f64_info>,
|
defm VFIXUPIMMPD : avx512_fixupimm_packed_all<avx512vl_f64_info>,
|
||||||
EVEX_CD8<64, CD8VF>, VEX_W;
|
EVEX_CD8<64, CD8VF>, VEX_W;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
// Patterns used to select SSE scalar fp arithmetic instructions from
|
||||||
|
// either:
|
||||||
|
//
|
||||||
|
// (1) a scalar fp operation followed by a blend
|
||||||
|
//
|
||||||
|
// The effect is that the backend no longer emits unnecessary vector
|
||||||
|
// insert instructions immediately after SSE scalar fp instructions
|
||||||
|
// like addss or mulss.
|
||||||
|
//
|
||||||
|
// For example, given the following code:
|
||||||
|
// __m128 foo(__m128 A, __m128 B) {
|
||||||
|
// A[0] += B[0];
|
||||||
|
// return A;
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// Previously we generated:
|
||||||
|
// addss %xmm0, %xmm1
|
||||||
|
// movss %xmm1, %xmm0
|
||||||
|
//
|
||||||
|
// We now generate:
|
||||||
|
// addss %xmm1, %xmm0
|
||||||
|
//
|
||||||
|
// (2) a vector packed single/double fp operation followed by a vector insert
|
||||||
|
//
|
||||||
|
// The effect is that the backend converts the packed fp instruction
|
||||||
|
// followed by a vector insert into a single SSE scalar fp instruction.
|
||||||
|
//
|
||||||
|
// For example, given the following code:
|
||||||
|
// __m128 foo(__m128 A, __m128 B) {
|
||||||
|
// __m128 C = A + B;
|
||||||
|
// return (__m128) {c[0], a[1], a[2], a[3]};
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// Previously we generated:
|
||||||
|
// addps %xmm0, %xmm1
|
||||||
|
// movss %xmm1, %xmm0
|
||||||
|
//
|
||||||
|
// We now generate:
|
||||||
|
// addss %xmm1, %xmm0
|
||||||
|
|
||||||
|
// TODO: Some canonicalization in lowering would simplify the number of
|
||||||
|
// patterns we have to try to match.
|
||||||
|
multiclass AVX512_scalar_math_f32_patterns<SDNode Op, string OpcPrefix> {
|
||||||
|
let Predicates = [HasAVX512] in {
|
||||||
|
// extracted scalar math op with insert via blend
|
||||||
|
def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst), (v4f32 (scalar_to_vector
|
||||||
|
(Op (f32 (extractelt (v4f32 VR128:$dst), (iPTR 0))),
|
||||||
|
FR32:$src))), (i8 1))),
|
||||||
|
(!cast<I>("V"#OpcPrefix#SSZrr_Int) v4f32:$dst,
|
||||||
|
(COPY_TO_REGCLASS FR32:$src, VR128))>;
|
||||||
|
|
||||||
|
// vector math op with insert via movss
|
||||||
|
def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst),
|
||||||
|
(Op (v4f32 VR128:$dst), (v4f32 VR128:$src)))),
|
||||||
|
(!cast<I>("V"#OpcPrefix#SSZrr_Int) v4f32:$dst, v4f32:$src)>;
|
||||||
|
|
||||||
|
// vector math op with insert via blend
|
||||||
|
def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst),
|
||||||
|
(Op (v4f32 VR128:$dst), (v4f32 VR128:$src)), (i8 1))),
|
||||||
|
(!cast<I>("V"#OpcPrefix#SSZrr_Int) v4f32:$dst, v4f32:$src)>;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
defm : AVX512_scalar_math_f32_patterns<fadd, "ADD">;
|
||||||
|
defm : AVX512_scalar_math_f32_patterns<fsub, "SUB">;
|
||||||
|
defm : AVX512_scalar_math_f32_patterns<fmul, "MUL">;
|
||||||
|
defm : AVX512_scalar_math_f32_patterns<fdiv, "DIV">;
|
||||||
|
|
||||||
|
multiclass AVX512_scalar_math_f64_patterns<SDNode Op, string OpcPrefix> {
|
||||||
|
let Predicates = [HasAVX512] in {
|
||||||
|
// extracted scalar math op with insert via movsd
|
||||||
|
def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst), (v2f64 (scalar_to_vector
|
||||||
|
(Op (f64 (extractelt (v2f64 VR128:$dst), (iPTR 0))),
|
||||||
|
FR64:$src))))),
|
||||||
|
(!cast<I>("V"#OpcPrefix#SDZrr_Int) v2f64:$dst,
|
||||||
|
(COPY_TO_REGCLASS FR64:$src, VR128))>;
|
||||||
|
|
||||||
|
// extracted scalar math op with insert via blend
|
||||||
|
def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst), (v2f64 (scalar_to_vector
|
||||||
|
(Op (f64 (extractelt (v2f64 VR128:$dst), (iPTR 0))),
|
||||||
|
FR64:$src))), (i8 1))),
|
||||||
|
(!cast<I>("V"#OpcPrefix#SDZrr_Int) v2f64:$dst,
|
||||||
|
(COPY_TO_REGCLASS FR64:$src, VR128))>;
|
||||||
|
|
||||||
|
// vector math op with insert via movsd
|
||||||
|
def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst),
|
||||||
|
(Op (v2f64 VR128:$dst), (v2f64 VR128:$src)))),
|
||||||
|
(!cast<I>("V"#OpcPrefix#SDZrr_Int) v2f64:$dst, v2f64:$src)>;
|
||||||
|
|
||||||
|
// vector math op with insert via blend
|
||||||
|
def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst),
|
||||||
|
(Op (v2f64 VR128:$dst), (v2f64 VR128:$src)), (i8 1))),
|
||||||
|
(!cast<I>("V"#OpcPrefix#SDZrr_Int) v2f64:$dst, v2f64:$src)>;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
defm : AVX512_scalar_math_f64_patterns<fadd, "ADD">;
|
||||||
|
defm : AVX512_scalar_math_f64_patterns<fsub, "SUB">;
|
||||||
|
defm : AVX512_scalar_math_f64_patterns<fmul, "MUL">;
|
||||||
|
defm : AVX512_scalar_math_f64_patterns<fdiv, "DIV">;
|
||||||
|
|
|
@ -3147,7 +3147,7 @@ multiclass scalar_math_f32_patterns<SDNode Op, string OpcPrefix> {
|
||||||
|
|
||||||
// Repeat everything for AVX, except for the movss + scalar combo...
|
// Repeat everything for AVX, except for the movss + scalar combo...
|
||||||
// because that one shouldn't occur with AVX codegen?
|
// because that one shouldn't occur with AVX codegen?
|
||||||
let Predicates = [HasAVX] in {
|
let Predicates = [UseAVX] in {
|
||||||
// extracted scalar math op with insert via blend
|
// extracted scalar math op with insert via blend
|
||||||
def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst), (v4f32 (scalar_to_vector
|
def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst), (v4f32 (scalar_to_vector
|
||||||
(Op (f32 (extractelt (v4f32 VR128:$dst), (iPTR 0))),
|
(Op (f32 (extractelt (v4f32 VR128:$dst), (iPTR 0))),
|
||||||
|
@ -3203,7 +3203,7 @@ multiclass scalar_math_f64_patterns<SDNode Op, string OpcPrefix> {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Repeat everything for AVX.
|
// Repeat everything for AVX.
|
||||||
let Predicates = [HasAVX] in {
|
let Predicates = [UseAVX] in {
|
||||||
// extracted scalar math op with insert via movsd
|
// extracted scalar math op with insert via movsd
|
||||||
def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst), (v2f64 (scalar_to_vector
|
def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst), (v2f64 (scalar_to_vector
|
||||||
(Op (f64 (extractelt (v2f64 VR128:$dst), (iPTR 0))),
|
(Op (f64 (extractelt (v2f64 VR128:$dst), (iPTR 0))),
|
||||||
|
|
Loading…
Reference in New Issue