AVX-512: Implemented masking for integer arithmetic & logic instructions.

By Robert Khasanov rob.khasanov@gmail.com

llvm-svn: 204906
This commit is contained in:
Elena Demikhovsky 2014-03-27 09:45:08 +00:00
parent c13ee34378
commit bb2f6b72d3
5 changed files with 1923 additions and 93 deletions

View File

@ -1754,7 +1754,8 @@ def : Pat<(v8i64 (X86Vinsert undef, GR64:$src2, (iPTR 0))),
// AVX-512 - Integer arithmetic
//
multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
ValueType OpVT, RegisterClass RC, PatFrag memop_frag,
ValueType OpVT, RegisterClass KRC,
RegisterClass RC, PatFrag memop_frag,
X86MemOperand x86memop, PatFrag scalar_mfrag,
X86MemOperand x86scalar_mop, string BrdcstStr,
OpndItins itins, bit IsCommutable = 0> {
@ -1764,11 +1765,51 @@ multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
!strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set RC:$dst, (OpVT (OpNode (OpVT RC:$src1), (OpVT RC:$src2))))],
itins.rr>, EVEX_4V;
let AddedComplexity = 30 in {
let Constraints = "$src0 = $dst" in
def rrk : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
(ins RC:$src0, KRC:$mask, RC:$src1, RC:$src2),
!strconcat(OpcodeStr,
" \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
[(set RC:$dst, (OpVT (vselect KRC:$mask,
(OpNode (OpVT RC:$src1), (OpVT RC:$src2)),
RC:$src0)))],
itins.rr>, EVEX_4V, EVEX_K;
def rrkz : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
(ins KRC:$mask, RC:$src1, RC:$src2),
!strconcat(OpcodeStr, " \t{$src2, $src1, $dst {${mask}} {z}" ,
"|$dst {${mask}} {z}, $src1, $src2}"),
[(set RC:$dst, (OpVT (vselect KRC:$mask,
(OpNode (OpVT RC:$src1), (OpVT RC:$src2)),
(OpVT immAllZerosV))))],
itins.rr>, EVEX_4V, EVEX_KZ;
}
let mayLoad = 1 in {
def rm : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2),
!strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set RC:$dst, (OpVT (OpNode (OpVT RC:$src1), (memop_frag addr:$src2))))],
itins.rm>, EVEX_4V;
let AddedComplexity = 30 in {
let Constraints = "$src0 = $dst" in
def rmk : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src0, KRC:$mask, RC:$src1, x86memop:$src2),
!strconcat(OpcodeStr,
" \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
[(set RC:$dst, (OpVT (vselect KRC:$mask,
(OpNode (OpVT RC:$src1), (memop_frag addr:$src2)),
RC:$src0)))],
itins.rm>, EVEX_4V, EVEX_K;
def rmkz : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
(ins KRC:$mask, RC:$src1, x86memop:$src2),
!strconcat(OpcodeStr,
" \t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}"),
[(set RC:$dst, (OpVT (vselect KRC:$mask,
(OpNode (OpVT RC:$src1), (memop_frag addr:$src2)),
(OpVT immAllZerosV))))],
itins.rm>, EVEX_4V, EVEX_KZ;
}
def rmb : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86scalar_mop:$src2),
!strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
@ -1776,50 +1817,117 @@ multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
[(set RC:$dst, (OpNode RC:$src1,
(OpVT (X86VBroadcast (scalar_mfrag addr:$src2)))))],
itins.rm>, EVEX_4V, EVEX_B;
let AddedComplexity = 30 in {
let Constraints = "$src0 = $dst" in
def rmbk : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src0, KRC:$mask, RC:$src1, x86scalar_mop:$src2),
!strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
", $src1, $dst {${mask}}|$dst {${mask}}, $src1, ${src2}",
BrdcstStr, "}"),
[(set RC:$dst, (OpVT (vselect KRC:$mask,
(OpNode (OpVT RC:$src1),
(OpVT (X86VBroadcast (scalar_mfrag addr:$src2)))),
RC:$src0)))],
itins.rm>, EVEX_4V, EVEX_B, EVEX_K;
def rmbkz : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
(ins KRC:$mask, RC:$src1, x86scalar_mop:$src2),
!strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
", $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, ${src2}",
BrdcstStr, "}"),
[(set RC:$dst, (OpVT (vselect KRC:$mask,
(OpNode (OpVT RC:$src1),
(OpVT (X86VBroadcast (scalar_mfrag addr:$src2)))),
(OpVT immAllZerosV))))],
itins.rm>, EVEX_4V, EVEX_B, EVEX_KZ;
}
multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr,
ValueType DstVT, ValueType SrcVT, RegisterClass RC,
}
}
multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr, ValueType DstVT,
ValueType SrcVT, RegisterClass KRC, RegisterClass RC,
PatFrag memop_frag, X86MemOperand x86memop,
OpndItins itins,
bit IsCommutable = 0> {
PatFrag scalar_mfrag, X86MemOperand x86scalar_mop,
string BrdcstStr, OpndItins itins, bit IsCommutable = 0> {
let isCommutable = IsCommutable in
{
def rr : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, RC:$src2),
!strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[]>, EVEX_4V, VEX_W;
[]>, EVEX_4V;
def rrk : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
(ins KRC:$mask, RC:$src1, RC:$src2),
!strconcat(OpcodeStr,
" \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
[], itins.rr>, EVEX_4V, EVEX_K;
def rrkz : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
(ins KRC:$mask, RC:$src1, RC:$src2),
!strconcat(OpcodeStr, " \t{$src2, $src1, $dst {${mask}} {z}" ,
"|$dst {${mask}} {z}, $src1, $src2}"),
[], itins.rr>, EVEX_4V, EVEX_KZ;
}
let mayLoad = 1 in {
def rm : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2),
!strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[]>, EVEX_4V, VEX_W;
[]>, EVEX_4V;
def rmk : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
(ins KRC:$mask, RC:$src1, x86memop:$src2),
!strconcat(OpcodeStr,
" \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
[], itins.rm>, EVEX_4V, EVEX_K;
def rmkz : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
(ins KRC:$mask, RC:$src1, x86memop:$src2),
!strconcat(OpcodeStr,
" \t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}"),
[], itins.rm>, EVEX_4V, EVEX_KZ;
def rmb : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86scalar_mop:$src2),
!strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
", $src1, $dst|$dst, $src1, ${src2}", BrdcstStr, "}"),
[], itins.rm>, EVEX_4V, EVEX_B;
def rmbk : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
(ins KRC:$mask, RC:$src1, x86scalar_mop:$src2),
!strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
", $src1, $dst {${mask}}|$dst {${mask}}, $src1, ${src2}",
BrdcstStr, "}"),
[], itins.rm>, EVEX_4V, EVEX_B, EVEX_K;
def rmbkz : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
(ins KRC:$mask, RC:$src1, x86scalar_mop:$src2),
!strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
", $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, ${src2}",
BrdcstStr, "}"),
[], itins.rm>, EVEX_4V, EVEX_B, EVEX_KZ;
}
}
defm VPADDDZ : avx512_binop_rm<0xFE, "vpaddd", add, v16i32, VR512, memopv16i32,
i512mem, loadi32, i32mem, "{1to16}", SSE_INTALU_ITINS_P, 1>,
EVEX_V512, EVEX_CD8<32, CD8VF>;
defm VPADDDZ : avx512_binop_rm<0xFE, "vpaddd", add, v16i32, VK16WM, VR512,
memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
SSE_INTALU_ITINS_P, 1>, EVEX_V512, EVEX_CD8<32, CD8VF>;
defm VPSUBDZ : avx512_binop_rm<0xFA, "vpsubd", sub, v16i32, VR512, memopv16i32,
i512mem, loadi32, i32mem, "{1to16}", SSE_INTALU_ITINS_P, 0>,
EVEX_V512, EVEX_CD8<32, CD8VF>;
defm VPSUBDZ : avx512_binop_rm<0xFA, "vpsubd", sub, v16i32, VK16WM, VR512,
memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
SSE_INTALU_ITINS_P, 0>, EVEX_V512, EVEX_CD8<32, CD8VF>;
defm VPMULLDZ : avx512_binop_rm<0x40, "vpmulld", mul, v16i32, VR512, memopv16i32,
i512mem, loadi32, i32mem, "{1to16}", SSE_INTALU_ITINS_P, 1>,
T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
defm VPMULLDZ : avx512_binop_rm<0x40, "vpmulld", mul, v16i32, VK16WM, VR512,
memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
SSE_INTALU_ITINS_P, 1>, T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
defm VPADDQZ : avx512_binop_rm<0xD4, "vpaddq", add, v8i64, VR512, memopv8i64,
i512mem, loadi64, i64mem, "{1to8}", SSE_INTALU_ITINS_P, 1>,
EVEX_CD8<64, CD8VF>, EVEX_V512, VEX_W;
defm VPADDQZ : avx512_binop_rm<0xD4, "vpaddq", add, v8i64, VK8WM, VR512,
memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
SSE_INTALU_ITINS_P, 1>, EVEX_CD8<64, CD8VF>, EVEX_V512, VEX_W;
defm VPSUBQZ : avx512_binop_rm<0xFB, "vpsubq", sub, v8i64, VR512, memopv8i64,
i512mem, loadi64, i64mem, "{1to8}", SSE_INTALU_ITINS_P, 0>,
EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
defm VPSUBQZ : avx512_binop_rm<0xFB, "vpsubq", sub, v8i64, VK8WM, VR512,
memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
SSE_INTALU_ITINS_P, 0>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
defm VPMULDQZ : avx512_binop_rm2<0x28, "vpmuldq", v8i64, v16i32,
VR512, memopv8i64, i512mem, SSE_INTALU_ITINS_P, 1>, T8PD,
EVEX_V512, EVEX_CD8<64, CD8VF>;
defm VPMULDQZ : avx512_binop_rm2<0x28, "vpmuldq", v8i64, v16i32, VK8WM, VR512,
memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
SSE_INTALU_ITINS_P, 1>, T8PD, EVEX_V512,
EVEX_CD8<64, CD8VF>, VEX_W;
defm VPMULUDQZ : avx512_binop_rm2<0xF4, "vpmuludq", v8i64, v16i32,
VR512, memopv8i64, i512mem, SSE_INTMUL_ITINS_P, 1>, EVEX_V512,
EVEX_CD8<64, CD8VF>;
defm VPMULUDQZ : avx512_binop_rm2<0xF4, "vpmuludq", v8i64, v16i32, VK8WM, VR512,
memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
SSE_INTMUL_ITINS_P, 1>, EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W;
def : Pat<(v8i64 (X86pmuludq (v16i32 VR512:$src1), (v16i32 VR512:$src2))),
(VPMULUDQZrr VR512:$src1, VR512:$src2)>;
@ -1831,32 +1939,40 @@ def : Pat<(v8i64 (int_x86_avx512_mask_pmul_dq_512 (v16i32 VR512:$src1),
(v16i32 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
(VPMULDQZrr VR512:$src1, VR512:$src2)>;
defm VPMAXUDZ : avx512_binop_rm<0x3F, "vpmaxud", X86umax, v16i32, VR512, memopv16i32,
i512mem, loadi32, i32mem, "{1to16}", SSE_INTALU_ITINS_P, 1>,
defm VPMAXUDZ : avx512_binop_rm<0x3F, "vpmaxud", X86umax, v16i32, VK16WM, VR512,
memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
SSE_INTALU_ITINS_P, 1>,
T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
defm VPMAXUQZ : avx512_binop_rm<0x3F, "vpmaxuq", X86umax, v8i64, VR512, memopv8i64,
i512mem, loadi64, i64mem, "{1to8}", SSE_INTALU_ITINS_P, 0>,
defm VPMAXUQZ : avx512_binop_rm<0x3F, "vpmaxuq", X86umax, v8i64, VK8WM, VR512,
memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
SSE_INTALU_ITINS_P, 0>,
T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
defm VPMAXSDZ : avx512_binop_rm<0x3D, "vpmaxsd", X86smax, v16i32, VR512, memopv16i32,
i512mem, loadi32, i32mem, "{1to16}", SSE_INTALU_ITINS_P, 1>,
defm VPMAXSDZ : avx512_binop_rm<0x3D, "vpmaxsd", X86smax, v16i32, VK16WM, VR512,
memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
SSE_INTALU_ITINS_P, 1>,
T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
defm VPMAXSQZ : avx512_binop_rm<0x3D, "vpmaxsq", X86smax, v8i64, VR512, memopv8i64,
i512mem, loadi64, i64mem, "{1to8}", SSE_INTALU_ITINS_P, 0>,
defm VPMAXSQZ : avx512_binop_rm<0x3D, "vpmaxsq", X86smax, v8i64, VK8WM, VR512,
memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
SSE_INTALU_ITINS_P, 0>,
T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
defm VPMINUDZ : avx512_binop_rm<0x3B, "vpminud", X86umin, v16i32, VR512, memopv16i32,
i512mem, loadi32, i32mem, "{1to16}", SSE_INTALU_ITINS_P, 1>,
defm VPMINUDZ : avx512_binop_rm<0x3B, "vpminud", X86umin, v16i32, VK16WM, VR512,
memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
SSE_INTALU_ITINS_P, 1>,
T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
defm VPMINUQZ : avx512_binop_rm<0x3B, "vpminuq", X86umin, v8i64, VR512, memopv8i64,
i512mem, loadi64, i64mem, "{1to8}", SSE_INTALU_ITINS_P, 0>,
defm VPMINUQZ : avx512_binop_rm<0x3B, "vpminuq", X86umin, v8i64, VK8WM, VR512,
memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
SSE_INTALU_ITINS_P, 0>,
T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
defm VPMINSDZ : avx512_binop_rm<0x39, "vpminsd", X86smin, v16i32, VR512, memopv16i32,
i512mem, loadi32, i32mem, "{1to16}", SSE_INTALU_ITINS_P, 1>,
defm VPMINSDZ : avx512_binop_rm<0x39, "vpminsd", X86smin, v16i32, VK16WM, VR512,
memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
SSE_INTALU_ITINS_P, 1>,
T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
defm VPMINSQZ : avx512_binop_rm<0x39, "vpminsq", X86smin, v8i64, VR512, memopv8i64,
i512mem, loadi64, i64mem, "{1to8}", SSE_INTALU_ITINS_P, 0>,
defm VPMINSQZ : avx512_binop_rm<0x39, "vpminsq", X86smin, v8i64, VK8WM, VR512,
memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
SSE_INTALU_ITINS_P, 0>,
T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
def : Pat <(v16i32 (int_x86_avx512_mask_pmaxs_d_512 (v16i32 VR512:$src1),
@ -1988,30 +2104,30 @@ def : Pat<(v8i64 (X86VPermilp VR512:$src1, (i8 imm:$imm))),
// AVX-512 Logical Instructions
//===----------------------------------------------------------------------===//
defm VPANDDZ : avx512_binop_rm<0xDB, "vpandd", and, v16i32, VR512, memopv16i32,
defm VPANDDZ : avx512_binop_rm<0xDB, "vpandd", and, v16i32, VK16WM, VR512, memopv16i32,
i512mem, loadi32, i32mem, "{1to16}", SSE_BIT_ITINS_P, 1>,
EVEX_V512, EVEX_CD8<32, CD8VF>;
defm VPANDQZ : avx512_binop_rm<0xDB, "vpandq", and, v8i64, VR512, memopv8i64,
defm VPANDQZ : avx512_binop_rm<0xDB, "vpandq", and, v8i64, VK8WM, VR512, memopv8i64,
i512mem, loadi64, i64mem, "{1to8}", SSE_BIT_ITINS_P, 1>,
EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
defm VPORDZ : avx512_binop_rm<0xEB, "vpord", or, v16i32, VR512, memopv16i32,
defm VPORDZ : avx512_binop_rm<0xEB, "vpord", or, v16i32, VK16WM, VR512, memopv16i32,
i512mem, loadi32, i32mem, "{1to16}", SSE_BIT_ITINS_P, 1>,
EVEX_V512, EVEX_CD8<32, CD8VF>;
defm VPORQZ : avx512_binop_rm<0xEB, "vporq", or, v8i64, VR512, memopv8i64,
defm VPORQZ : avx512_binop_rm<0xEB, "vporq", or, v8i64, VK8WM, VR512, memopv8i64,
i512mem, loadi64, i64mem, "{1to8}", SSE_BIT_ITINS_P, 1>,
EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
defm VPXORDZ : avx512_binop_rm<0xEF, "vpxord", xor, v16i32, VR512, memopv16i32,
defm VPXORDZ : avx512_binop_rm<0xEF, "vpxord", xor, v16i32, VK16WM, VR512, memopv16i32,
i512mem, loadi32, i32mem, "{1to16}", SSE_BIT_ITINS_P, 1>,
EVEX_V512, EVEX_CD8<32, CD8VF>;
defm VPXORQZ : avx512_binop_rm<0xEF, "vpxorq", xor, v8i64, VR512, memopv8i64,
defm VPXORQZ : avx512_binop_rm<0xEF, "vpxorq", xor, v8i64, VK8WM, VR512, memopv8i64,
i512mem, loadi64, i64mem, "{1to8}", SSE_BIT_ITINS_P, 1>,
EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
defm VPANDNDZ : avx512_binop_rm<0xDF, "vpandnd", X86andnp, v16i32, VR512,
defm VPANDNDZ : avx512_binop_rm<0xDF, "vpandnd", X86andnp, v16i32, VK16WM, VR512,
memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
SSE_BIT_ITINS_P, 0>, EVEX_V512, EVEX_CD8<32, CD8VF>;
defm VPANDNQZ : avx512_binop_rm<0xDF, "vpandnq", X86andnp, v8i64, VR512, memopv8i64,
i512mem, loadi64, i64mem, "{1to8}", SSE_BIT_ITINS_P, 0>,
EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
defm VPANDNQZ : avx512_binop_rm<0xDF, "vpandnq", X86andnp, v8i64, VK8WM, VR512,
memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
SSE_BIT_ITINS_P, 0>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
//===----------------------------------------------------------------------===//
// AVX-512 FP arithmetic
@ -3935,28 +4051,80 @@ def : Pat<(v16i32 (X86PAlignr VR512:$src1, VR512:$src2, (i8 imm:$imm))),
def : Pat<(v8i64 (X86PAlignr VR512:$src1, VR512:$src2, (i8 imm:$imm))),
(VALIGNQrri VR512:$src2, VR512:$src1, imm:$imm)>;
multiclass avx512_vpabs<bits<8> opc, string OpcodeStr, RegisterClass RC,
X86MemOperand x86memop> {
// Helper fragments to match sext vXi1 to vXiY.
def v16i1sextv16i32 : PatLeaf<(v16i32 (X86vsrai VR512:$src, (i8 31)))>;
def v8i1sextv8i64 : PatLeaf<(v8i64 (X86vsrai VR512:$src, (i8 63)))>;
multiclass avx512_vpabs<bits<8> opc, string OpcodeStr, ValueType OpVT,
RegisterClass KRC, RegisterClass RC,
X86MemOperand x86memop, X86MemOperand x86scalar_mop,
string BrdcstStr> {
def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
!strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"), []>,
EVEX;
!strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
[]>, EVEX;
def rrk : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins KRC:$mask, RC:$src),
!strconcat(OpcodeStr, " \t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
[]>, EVEX, EVEX_K;
def rrkz : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins KRC:$mask, RC:$src),
!strconcat(OpcodeStr,
" \t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}"),
[]>, EVEX, EVEX_KZ;
let mayLoad = 1 in {
def rm : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
(ins x86memop:$src),
!strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"), []>,
EVEX;
!strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
[]>, EVEX;
def rmk : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
(ins KRC:$mask, x86memop:$src),
!strconcat(OpcodeStr,
" \t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
[]>, EVEX, EVEX_K;
def rmkz : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
(ins KRC:$mask, x86memop:$src),
!strconcat(OpcodeStr,
" \t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}"),
[]>, EVEX, EVEX_KZ;
def rmb : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
(ins x86scalar_mop:$src),
!strconcat(OpcodeStr, " \t{${src}", BrdcstStr,
", $dst|$dst, ${src}", BrdcstStr, "}"),
[]>, EVEX, EVEX_B;
def rmbk : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
(ins KRC:$mask, x86scalar_mop:$src),
!strconcat(OpcodeStr, " \t{${src}", BrdcstStr,
", $dst {${mask}}|$dst {${mask}}, ${src}", BrdcstStr, "}"),
[]>, EVEX, EVEX_B, EVEX_K;
def rmbkz : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
(ins KRC:$mask, x86scalar_mop:$src),
!strconcat(OpcodeStr, " \t{${src}", BrdcstStr,
", $dst {${mask}} {z}|$dst {${mask}} {z}, ${src}",
BrdcstStr, "}"),
[]>, EVEX, EVEX_B, EVEX_KZ;
}
}
defm VPABSD : avx512_vpabs<0x1E, "vpabsd", VR512, i512mem>, EVEX_V512,
defm VPABSDZ : avx512_vpabs<0x1E, "vpabsd", v16i32, VK16WM, VR512,
i512mem, i32mem, "{1to16}">, EVEX_V512,
EVEX_CD8<32, CD8VF>;
defm VPABSQ : avx512_vpabs<0x1F, "vpabsq", VR512, i512mem>, EVEX_V512, VEX_W,
defm VPABSQZ : avx512_vpabs<0x1F, "vpabsq", v8i64, VK8WM, VR512,
i512mem, i64mem, "{1to8}">, EVEX_V512, VEX_W,
EVEX_CD8<64, CD8VF>;
def : Pat<(xor
(bc_v16i32 (v16i1sextv16i32)),
(bc_v16i32 (add (v16i32 VR512:$src), (v16i1sextv16i32)))),
(VPABSDZrr VR512:$src)>;
def : Pat<(xor
(bc_v8i64 (v8i1sextv8i64)),
(bc_v8i64 (add (v8i64 VR512:$src), (v8i1sextv8i64)))),
(VPABSQZrr VR512:$src)>;
def : Pat<(v16i32 (int_x86_avx512_mask_pabs_d_512 (v16i32 VR512:$src),
(v16i32 immAllZerosV), (i16 -1))),
(VPABSDrr VR512:$src)>;
(VPABSDZrr VR512:$src)>;
def : Pat<(v8i64 (int_x86_avx512_mask_pabs_q_512 (v8i64 VR512:$src),
(bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
(VPABSQrr VR512:$src)>;
(VPABSQZrr VR512:$src)>;
multiclass avx512_conflict<bits<8> opc, string OpcodeStr,
RegisterClass RC, RegisterClass KRC,

View File

@ -605,6 +605,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::VMOVDQA64rr, X86::VMOVDQA64rm, TB_ALIGN_64 },
{ X86::VMOVDQU32rr, X86::VMOVDQU32rm, 0 },
{ X86::VMOVDQU64rr, X86::VMOVDQU64rm, 0 },
{ X86::VPABSDZrr, X86::VPABSDZrm, 0 },
{ X86::VPABSQZrr, X86::VPABSQZrm, 0 },
// AES foldable instructions
{ X86::AESIMCrr, X86::AESIMCrm, TB_ALIGN_16 },
@ -1210,8 +1212,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::PEXT64rr, X86::PEXT64rm, 0 },
// AVX-512 foldable instructions
{ X86::VPADDDZrr, X86::VPADDDZrm, 0 },
{ X86::VPADDQZrr, X86::VPADDQZrm, 0 },
{ X86::VADDPSZrr, X86::VADDPSZrm, 0 },
{ X86::VADDPDZrr, X86::VADDPDZrm, 0 },
{ X86::VSUBPSZrr, X86::VSUBPSZrm, 0 },
@ -1224,17 +1224,31 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::VMINPDZrr, X86::VMINPDZrm, 0 },
{ X86::VMAXPSZrr, X86::VMAXPSZrm, 0 },
{ X86::VMAXPDZrr, X86::VMAXPDZrm, 0 },
{ X86::VPADDDZrr, X86::VPADDDZrm, 0 },
{ X86::VPADDQZrr, X86::VPADDQZrm, 0 },
{ X86::VPERMPDZri, X86::VPERMPDZmi, 0 },
{ X86::VPERMPSZrr, X86::VPERMPSZrm, 0 },
{ X86::VPMAXSDZrr, X86::VPMAXSDZrm, 0 },
{ X86::VPMAXSQZrr, X86::VPMAXSQZrm, 0 },
{ X86::VPMAXUDZrr, X86::VPMAXUDZrm, 0 },
{ X86::VPMAXUQZrr, X86::VPMAXUQZrm, 0 },
{ X86::VPMINSDZrr, X86::VPMINSDZrm, 0 },
{ X86::VPMINSQZrr, X86::VPMINSQZrm, 0 },
{ X86::VPMINUDZrr, X86::VPMINUDZrm, 0 },
{ X86::VPMINUQZrr, X86::VPMINUQZrm, 0 },
{ X86::VPMULDQZrr, X86::VPMULDQZrm, 0 },
{ X86::VPSLLVDZrr, X86::VPSLLVDZrm, 0 },
{ X86::VPSLLVQZrr, X86::VPSLLVQZrm, 0 },
{ X86::VPSRAVDZrr, X86::VPSRAVDZrm, 0 },
{ X86::VPSRLVDZrr, X86::VPSRLVDZrm, 0 },
{ X86::VPSRLVQZrr, X86::VPSRLVQZrm, 0 },
{ X86::VPSUBDZrr, X86::VPSUBDZrm, 0 },
{ X86::VPSUBQZrr, X86::VPSUBQZrm, 0 },
{ X86::VSHUFPDZrri, X86::VSHUFPDZrmi, 0 },
{ X86::VSHUFPSZrri, X86::VSHUFPSZrmi, 0 },
{ X86::VALIGNQrri, X86::VALIGNQrmi, 0 },
{ X86::VALIGNDrri, X86::VALIGNDrmi, 0 },
{ X86::VPMULUDQZrr, X86::VPMULUDQZrm, 0 },
// AES foldable instructions
{ X86::AESDECLASTrr, X86::AESDECLASTrm, TB_ALIGN_16 },

View File

@ -163,6 +163,40 @@ define <8 x i64> @vpaddq_test(<8 x i64> %i, <8 x i64> %j) nounwind readnone {
ret <8 x i64> %x
}
; CHECK-LABEL: vpaddq_fold_test
; CHECK: vpaddq (%
; CHECK: ret
define <8 x i64> @vpaddq_fold_test(<8 x i64> %i, <8 x i64>* %j) nounwind {
%tmp = load <8 x i64>* %j, align 4
%x = add <8 x i64> %i, %tmp
ret <8 x i64> %x
}
; CHECK-LABEL: vpaddq_broadcast_test
; CHECK: vpaddq LCP{{.*}}(%rip){1to8}
; CHECK: ret
define <8 x i64> @vpaddq_broadcast_test(<8 x i64> %i) nounwind {
%x = add <8 x i64> %i, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
ret <8 x i64> %x
}
; CHECK-LABEL: vpaddq_broadcast2_test
; CHECK: vpaddq (%rdi){1to8}
; CHECK: ret
define <8 x i64> @vpaddq_broadcast2_test(<8 x i64> %i, i64* %j) nounwind {
%tmp = load i64* %j
%j.0 = insertelement <8 x i64> undef, i64 %tmp, i32 0
%j.1 = insertelement <8 x i64> %j.0, i64 %tmp, i32 1
%j.2 = insertelement <8 x i64> %j.1, i64 %tmp, i32 2
%j.3 = insertelement <8 x i64> %j.2, i64 %tmp, i32 3
%j.4 = insertelement <8 x i64> %j.3, i64 %tmp, i32 4
%j.5 = insertelement <8 x i64> %j.4, i64 %tmp, i32 5
%j.6 = insertelement <8 x i64> %j.5, i64 %tmp, i32 6
%j.7 = insertelement <8 x i64> %j.6, i64 %tmp, i32 7
%x = add <8 x i64> %i, %j.7
ret <8 x i64> %x
}
; CHECK-LABEL: vpaddd_test
; CHECK: vpaddd %zmm
; CHECK: ret
@ -171,6 +205,85 @@ define <16 x i32> @vpaddd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone {
ret <16 x i32> %x
}
; CHECK-LABEL: vpaddd_fold_test
; CHECK: vpaddd (%
; CHECK: ret
define <16 x i32> @vpaddd_fold_test(<16 x i32> %i, <16 x i32>* %j) nounwind {
%tmp = load <16 x i32>* %j, align 4
%x = add <16 x i32> %i, %tmp
ret <16 x i32> %x
}
; CHECK-LABEL: vpaddd_broadcast_test
; CHECK: vpaddd LCP{{.*}}(%rip){1to16}
; CHECK: ret
define <16 x i32> @vpaddd_broadcast_test(<16 x i32> %i) nounwind {
%x = add <16 x i32> %i, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
ret <16 x i32> %x
}
; CHECK-LABEL: vpaddd_mask_test
; CHECK: vpaddd {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]} }}
; CHECK: ret
define <16 x i32> @vpaddd_mask_test(<16 x i32> %i, <16 x i32> %j, <16 x i32> %mask1) nounwind readnone {
%mask = icmp ne <16 x i32> %mask1, zeroinitializer
%x = add <16 x i32> %i, %j
%r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i
ret <16 x i32> %r
}
; CHECK-LABEL: vpaddd_maskz_test
; CHECK: vpaddd {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]} {z} }}
; CHECK: ret
define <16 x i32> @vpaddd_maskz_test(<16 x i32> %i, <16 x i32> %j, <16 x i32> %mask1) nounwind readnone {
%mask = icmp ne <16 x i32> %mask1, zeroinitializer
%x = add <16 x i32> %i, %j
%r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
ret <16 x i32> %r
}
; CHECK-LABEL: vpaddd_mask_fold_test
; CHECK: vpaddd (%rdi), {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]} }}
; CHECK: ret
define <16 x i32> @vpaddd_mask_fold_test(<16 x i32> %i, <16 x i32>* %j.ptr, <16 x i32> %mask1) nounwind readnone {
%mask = icmp ne <16 x i32> %mask1, zeroinitializer
%j = load <16 x i32>* %j.ptr
%x = add <16 x i32> %i, %j
%r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i
ret <16 x i32> %r
}
; CHECK-LABEL: vpaddd_mask_broadcast_test
; CHECK: vpaddd LCP{{.*}}(%rip){1to16}, {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]} }}
; CHECK: ret
define <16 x i32> @vpaddd_mask_broadcast_test(<16 x i32> %i, <16 x i32> %mask1) nounwind readnone {
%mask = icmp ne <16 x i32> %mask1, zeroinitializer
%x = add <16 x i32> %i, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i
ret <16 x i32> %r
}
; CHECK-LABEL: vpaddd_maskz_fold_test
; CHECK: vpaddd (%rdi), {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}}} {z}
; CHECK: ret
define <16 x i32> @vpaddd_maskz_fold_test(<16 x i32> %i, <16 x i32>* %j.ptr, <16 x i32> %mask1) nounwind readnone {
%mask = icmp ne <16 x i32> %mask1, zeroinitializer
%j = load <16 x i32>* %j.ptr
%x = add <16 x i32> %i, %j
%r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
ret <16 x i32> %r
}
; CHECK-LABEL: vpaddd_maskz_broadcast_test
; CHECK: vpaddd LCP{{.*}}(%rip){1to16}, {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}}} {z}
; CHECK: ret
define <16 x i32> @vpaddd_maskz_broadcast_test(<16 x i32> %i, <16 x i32> %mask1) nounwind readnone {
%mask = icmp ne <16 x i32> %mask1, zeroinitializer
%x = add <16 x i32> %i, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
ret <16 x i32> %r
}
; CHECK-LABEL: vpsubq_test
; CHECK: vpsubq %zmm
; CHECK: ret

View File

@ -1,6 +1,7 @@
; RUN: llc < %s -march=x86-64 -mcpu=x86-64 | FileCheck %s -check-prefix=SSE2
; RUN: llc < %s -march=x86-64 -mcpu=corei7 | FileCheck %s -check-prefix=SSSE3
; RUN: llc < %s -march=x86-64 -mcpu=core-avx2 | FileCheck %s -check-prefix=AVX2
; RUN: llc < %s -march=x86-64 -mcpu=knl | FileCheck %s -check-prefix=AVX512
define <4 x i32> @test1(<4 x i32> %a) nounwind {
; SSE2-LABEL: test1:
@ -17,6 +18,10 @@ define <4 x i32> @test1(<4 x i32> %a) nounwind {
; AVX2-LABEL: test1:
; AVX2: vpabsd
; AVX2-NEXT: ret
; AVX512-LABEL: test1:
; AVX512: vpabsd
; AVX512-NEXT: ret
%tmp1neg = sub <4 x i32> zeroinitializer, %a
%b = icmp sgt <4 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1>
%abs = select <4 x i1> %b, <4 x i32> %a, <4 x i32> %tmp1neg
@ -38,6 +43,10 @@ define <4 x i32> @test2(<4 x i32> %a) nounwind {
; AVX2-LABEL: test2:
; AVX2: vpabsd
; AVX2-NEXT: ret
; AVX512-LABEL: test2:
; AVX512: vpabsd
; AVX512-NEXT: ret
%tmp1neg = sub <4 x i32> zeroinitializer, %a
%b = icmp sge <4 x i32> %a, zeroinitializer
%abs = select <4 x i1> %b, <4 x i32> %a, <4 x i32> %tmp1neg
@ -59,6 +68,10 @@ define <8 x i16> @test3(<8 x i16> %a) nounwind {
; AVX2-LABEL: test3:
; AVX2: vpabsw
; AVX2-NEXT: ret
; AVX512-LABEL: test3:
; AVX512: vpabsw
; AVX512-NEXT: ret
%tmp1neg = sub <8 x i16> zeroinitializer, %a
%b = icmp sgt <8 x i16> %a, zeroinitializer
%abs = select <8 x i1> %b, <8 x i16> %a, <8 x i16> %tmp1neg
@ -80,6 +93,10 @@ define <16 x i8> @test4(<16 x i8> %a) nounwind {
; AVX2-LABEL: test4:
; AVX2: vpabsb
; AVX2-NEXT: ret
; AVX512-LABEL: test4:
; AVX512: vpabsb
; AVX512-NEXT: ret
%tmp1neg = sub <16 x i8> zeroinitializer, %a
%b = icmp slt <16 x i8> %a, zeroinitializer
%abs = select <16 x i1> %b, <16 x i8> %tmp1neg, <16 x i8> %a
@ -101,6 +118,10 @@ define <4 x i32> @test5(<4 x i32> %a) nounwind {
; AVX2-LABEL: test5:
; AVX2: vpabsd
; AVX2-NEXT: ret
; AVX512-LABEL: test5:
; AVX512: vpabsd
; AVX512-NEXT: ret
%tmp1neg = sub <4 x i32> zeroinitializer, %a
%b = icmp sle <4 x i32> %a, zeroinitializer
%abs = select <4 x i1> %b, <4 x i32> %tmp1neg, <4 x i32> %a
@ -116,6 +137,10 @@ define <8 x i32> @test6(<8 x i32> %a) nounwind {
; AVX2-LABEL: test6:
; AVX2: vpabsd {{.*}}%ymm
; AVX2-NEXT: ret
; AVX512-LABEL: test6:
; AVX512: vpabsd {{.*}}%ymm
; AVX512-NEXT: ret
%tmp1neg = sub <8 x i32> zeroinitializer, %a
%b = icmp sgt <8 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
%abs = select <8 x i1> %b, <8 x i32> %a, <8 x i32> %tmp1neg
@ -131,6 +156,10 @@ define <8 x i32> @test7(<8 x i32> %a) nounwind {
; AVX2-LABEL: test7:
; AVX2: vpabsd {{.*}}%ymm
; AVX2-NEXT: ret
; AVX512-LABEL: test7:
; AVX512: vpabsd {{.*}}%ymm
; AVX512-NEXT: ret
%tmp1neg = sub <8 x i32> zeroinitializer, %a
%b = icmp sge <8 x i32> %a, zeroinitializer
%abs = select <8 x i1> %b, <8 x i32> %a, <8 x i32> %tmp1neg
@ -146,6 +175,10 @@ define <16 x i16> @test8(<16 x i16> %a) nounwind {
; AVX2-LABEL: test8:
; AVX2: vpabsw {{.*}}%ymm
; AVX2-NEXT: ret
; AVX512-LABEL: test8:
; AVX512: vpabsw {{.*}}%ymm
; AVX512-NEXT: ret
%tmp1neg = sub <16 x i16> zeroinitializer, %a
%b = icmp sgt <16 x i16> %a, zeroinitializer
%abs = select <16 x i1> %b, <16 x i16> %a, <16 x i16> %tmp1neg
@ -161,6 +194,10 @@ define <32 x i8> @test9(<32 x i8> %a) nounwind {
; AVX2-LABEL: test9:
; AVX2: vpabsb {{.*}}%ymm
; AVX2-NEXT: ret
; AVX512-LABEL: test9:
; AVX512: vpabsb {{.*}}%ymm
; AVX512-NEXT: ret
%tmp1neg = sub <32 x i8> zeroinitializer, %a
%b = icmp slt <32 x i8> %a, zeroinitializer
%abs = select <32 x i1> %b, <32 x i8> %tmp1neg, <32 x i8> %a
@ -176,8 +213,58 @@ define <8 x i32> @test10(<8 x i32> %a) nounwind {
; AVX2-LABEL: test10:
; AVX2: vpabsd {{.*}}%ymm
; AVX2-NEXT: ret
; AVX512-LABEL: test10:
; AVX512: vpabsd {{.*}}%ymm
; AVX512-NEXT: ret
%tmp1neg = sub <8 x i32> zeroinitializer, %a
%b = icmp sle <8 x i32> %a, zeroinitializer
%abs = select <8 x i1> %b, <8 x i32> %tmp1neg, <8 x i32> %a
ret <8 x i32> %abs
}
define <16 x i32> @test11(<16 x i32> %a) nounwind {
; AVX2-LABEL: test11:
; AVX2: vpabsd
; AVX2: vpabsd
; AVX2-NEXT: ret
; AVX512-LABEL: test11:
; AVX512: vpabsd {{.*}}%zmm
; AVX512-NEXT: ret
%tmp1neg = sub <16 x i32> zeroinitializer, %a
%b = icmp sle <16 x i32> %a, zeroinitializer
%abs = select <16 x i1> %b, <16 x i32> %tmp1neg, <16 x i32> %a
ret <16 x i32> %abs
}
define <8 x i64> @test12(<8 x i64> %a) nounwind {
; AVX2-LABEL: test12:
; AVX2: vpxor
; AVX2: vpxor
; AVX2-NEXT: ret
; AVX512-LABEL: test12:
; AVX512: vpabsq {{.*}}%zmm
; AVX512-NEXT: ret
%tmp1neg = sub <8 x i64> zeroinitializer, %a
%b = icmp sle <8 x i64> %a, zeroinitializer
%abs = select <8 x i1> %b, <8 x i64> %tmp1neg, <8 x i64> %a
ret <8 x i64> %abs
}
define <8 x i64> @test13(<8 x i64>* %a.ptr) nounwind {
; AVX2-LABEL: test13:
; AVX2: vpxor
; AVX2: vpxor
; AVX2-NEXT: ret
; AVX512-LABEL: test13:
; AVX512: vpabsq (%
; AVX512-NEXT: ret
%a = load <8 x i64>* %a.ptr, align 8
%tmp1neg = sub <8 x i64> zeroinitializer, %a
%b = icmp sle <8 x i64> %a, zeroinitializer
%abs = select <8 x i1> %b, <8 x i64> %tmp1neg, <8 x i64> %a
ret <8 x i64> %abs
}

File diff suppressed because it is too large Load Diff