forked from OSchip/llvm-project
[X86] Remove AES/CLMUL/CRC32/LDDQU/MOVNT/POPCNT/SHA schedule itineraries (PR37093)
llvm-svn: 329912
This commit is contained in:
parent
c645f61ada
commit
8904a86f65
|
@ -4267,13 +4267,12 @@ let SchedRW = [WriteVecLoad] in {
|
||||||
}
|
}
|
||||||
|
|
||||||
multiclass avx512_movnt<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
|
multiclass avx512_movnt<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
|
||||||
PatFrag st_frag = alignednontemporalstore,
|
PatFrag st_frag = alignednontemporalstore> {
|
||||||
InstrItinClass itin = IIC_SSE_MOVNT> {
|
|
||||||
let SchedRW = [WriteVecStore], AddedComplexity = 400 in
|
let SchedRW = [WriteVecStore], AddedComplexity = 400 in
|
||||||
def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
|
def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
|
||||||
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
|
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
|
||||||
[(st_frag (_.VT _.RC:$src), addr:$dst)],
|
[(st_frag (_.VT _.RC:$src), addr:$dst)],
|
||||||
_.ExeDomain, itin>, EVEX, EVEX_CD8<_.EltSize, CD8VF>;
|
_.ExeDomain>, EVEX, EVEX_CD8<_.EltSize, CD8VF>;
|
||||||
}
|
}
|
||||||
|
|
||||||
multiclass avx512_movnt_vl<bits<8> opc, string OpcodeStr,
|
multiclass avx512_movnt_vl<bits<8> opc, string OpcodeStr,
|
||||||
|
|
|
@ -717,13 +717,13 @@ class SS428I<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||||
// SS42FI - SSE 4.2 instructions with T8XD prefix.
|
// SS42FI - SSE 4.2 instructions with T8XD prefix.
|
||||||
// NOTE: 'HasSSE42' is used as SS42FI is only used for CRC32 insns.
|
// NOTE: 'HasSSE42' is used as SS42FI is only used for CRC32 insns.
|
||||||
class SS42FI<bits<8> o, Format F, dag outs, dag ins, string asm,
|
class SS42FI<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||||
list<dag> pattern, InstrItinClass itin = NoItinerary>
|
list<dag> pattern>
|
||||||
: I<o, F, outs, ins, asm, pattern, itin>, T8XD, Requires<[HasSSE42]>;
|
: I<o, F, outs, ins, asm, pattern>, T8XD, Requires<[HasSSE42]>;
|
||||||
|
|
||||||
// SS42AI = SSE 4.2 instructions with TA prefix
|
// SS42AI = SSE 4.2 instructions with TA prefix
|
||||||
class SS42AI<bits<8> o, Format F, dag outs, dag ins, string asm,
|
class SS42AI<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||||
list<dag> pattern, InstrItinClass itin = NoItinerary>
|
list<dag> pattern>
|
||||||
: Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TAPD,
|
: Ii8<o, F, outs, ins, asm, pattern, NoItinerary, SSEPackedInt>, TAPD,
|
||||||
Requires<[UseSSE42]>;
|
Requires<[UseSSE42]>;
|
||||||
|
|
||||||
// AVX Instruction Templates:
|
// AVX Instruction Templates:
|
||||||
|
@ -857,19 +857,19 @@ class AVX512<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||||
// AES8I
|
// AES8I
|
||||||
// These use the same encoding as the SSE4.2 T8 and TA encodings.
|
// These use the same encoding as the SSE4.2 T8 and TA encodings.
|
||||||
class AES8I<bits<8> o, Format F, dag outs, dag ins, string asm,
|
class AES8I<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||||
list<dag>pattern, InstrItinClass itin = IIC_AES>
|
list<dag>pattern>
|
||||||
: I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, T8PD,
|
: I<o, F, outs, ins, asm, pattern, NoItinerary, SSEPackedInt>, T8PD,
|
||||||
Requires<[NoAVX, HasAES]>;
|
Requires<[NoAVX, HasAES]>;
|
||||||
|
|
||||||
class AESAI<bits<8> o, Format F, dag outs, dag ins, string asm,
|
class AESAI<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||||
list<dag> pattern, InstrItinClass itin = NoItinerary>
|
list<dag> pattern>
|
||||||
: Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TAPD,
|
: Ii8<o, F, outs, ins, asm, pattern, NoItinerary, SSEPackedInt>, TAPD,
|
||||||
Requires<[NoAVX, HasAES]>;
|
Requires<[NoAVX, HasAES]>;
|
||||||
|
|
||||||
// PCLMUL Instruction Templates
|
// PCLMUL Instruction Templates
|
||||||
class PCLMULIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
|
class PCLMULIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||||
list<dag>pattern, InstrItinClass itin = NoItinerary>
|
list<dag>pattern>
|
||||||
: Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TAPD;
|
: Ii8<o, F, outs, ins, asm, pattern, NoItinerary, SSEPackedInt>, TAPD;
|
||||||
|
|
||||||
// FMA3 Instruction Templates
|
// FMA3 Instruction Templates
|
||||||
class FMA3<bits<8> o, Format F, dag outs, dag ins, string asm,
|
class FMA3<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||||
|
|
|
@ -3384,27 +3384,23 @@ def VMOVNTPSmr : VPSI<0x2B, MRMDestMem, (outs),
|
||||||
(ins f128mem:$dst, VR128:$src),
|
(ins f128mem:$dst, VR128:$src),
|
||||||
"movntps\t{$src, $dst|$dst, $src}",
|
"movntps\t{$src, $dst|$dst, $src}",
|
||||||
[(alignednontemporalstore (v4f32 VR128:$src),
|
[(alignednontemporalstore (v4f32 VR128:$src),
|
||||||
addr:$dst)],
|
addr:$dst)]>, VEX, VEX_WIG;
|
||||||
IIC_SSE_MOVNT>, VEX, VEX_WIG;
|
|
||||||
def VMOVNTPDmr : VPDI<0x2B, MRMDestMem, (outs),
|
def VMOVNTPDmr : VPDI<0x2B, MRMDestMem, (outs),
|
||||||
(ins f128mem:$dst, VR128:$src),
|
(ins f128mem:$dst, VR128:$src),
|
||||||
"movntpd\t{$src, $dst|$dst, $src}",
|
"movntpd\t{$src, $dst|$dst, $src}",
|
||||||
[(alignednontemporalstore (v2f64 VR128:$src),
|
[(alignednontemporalstore (v2f64 VR128:$src),
|
||||||
addr:$dst)],
|
addr:$dst)]>, VEX, VEX_WIG;
|
||||||
IIC_SSE_MOVNT>, VEX, VEX_WIG;
|
|
||||||
|
|
||||||
def VMOVNTPSYmr : VPSI<0x2B, MRMDestMem, (outs),
|
def VMOVNTPSYmr : VPSI<0x2B, MRMDestMem, (outs),
|
||||||
(ins f256mem:$dst, VR256:$src),
|
(ins f256mem:$dst, VR256:$src),
|
||||||
"movntps\t{$src, $dst|$dst, $src}",
|
"movntps\t{$src, $dst|$dst, $src}",
|
||||||
[(alignednontemporalstore (v8f32 VR256:$src),
|
[(alignednontemporalstore (v8f32 VR256:$src),
|
||||||
addr:$dst)],
|
addr:$dst)]>, VEX, VEX_L, VEX_WIG;
|
||||||
IIC_SSE_MOVNT>, VEX, VEX_L, VEX_WIG;
|
|
||||||
def VMOVNTPDYmr : VPDI<0x2B, MRMDestMem, (outs),
|
def VMOVNTPDYmr : VPDI<0x2B, MRMDestMem, (outs),
|
||||||
(ins f256mem:$dst, VR256:$src),
|
(ins f256mem:$dst, VR256:$src),
|
||||||
"movntpd\t{$src, $dst|$dst, $src}",
|
"movntpd\t{$src, $dst|$dst, $src}",
|
||||||
[(alignednontemporalstore (v4f64 VR256:$src),
|
[(alignednontemporalstore (v4f64 VR256:$src),
|
||||||
addr:$dst)],
|
addr:$dst)]>, VEX, VEX_L, VEX_WIG;
|
||||||
IIC_SSE_MOVNT>, VEX, VEX_L, VEX_WIG;
|
|
||||||
} // SchedRW
|
} // SchedRW
|
||||||
|
|
||||||
let ExeDomain = SSEPackedInt, SchedRW = [WriteVecStore] in {
|
let ExeDomain = SSEPackedInt, SchedRW = [WriteVecStore] in {
|
||||||
|
@ -3412,45 +3408,38 @@ def VMOVNTDQmr : VPDI<0xE7, MRMDestMem, (outs),
|
||||||
(ins i128mem:$dst, VR128:$src),
|
(ins i128mem:$dst, VR128:$src),
|
||||||
"movntdq\t{$src, $dst|$dst, $src}",
|
"movntdq\t{$src, $dst|$dst, $src}",
|
||||||
[(alignednontemporalstore (v2i64 VR128:$src),
|
[(alignednontemporalstore (v2i64 VR128:$src),
|
||||||
addr:$dst)],
|
addr:$dst)]>, VEX, VEX_WIG;
|
||||||
IIC_SSE_MOVNT>, VEX, VEX_WIG;
|
|
||||||
def VMOVNTDQYmr : VPDI<0xE7, MRMDestMem, (outs),
|
def VMOVNTDQYmr : VPDI<0xE7, MRMDestMem, (outs),
|
||||||
(ins i256mem:$dst, VR256:$src),
|
(ins i256mem:$dst, VR256:$src),
|
||||||
"movntdq\t{$src, $dst|$dst, $src}",
|
"movntdq\t{$src, $dst|$dst, $src}",
|
||||||
[(alignednontemporalstore (v4i64 VR256:$src),
|
[(alignednontemporalstore (v4i64 VR256:$src),
|
||||||
addr:$dst)],
|
addr:$dst)]>, VEX, VEX_L, VEX_WIG;
|
||||||
IIC_SSE_MOVNT>, VEX, VEX_L, VEX_WIG;
|
|
||||||
} // ExeDomain, SchedRW
|
} // ExeDomain, SchedRW
|
||||||
} // Predicates
|
} // Predicates
|
||||||
|
|
||||||
let SchedRW = [WriteVecStore] in {
|
let SchedRW = [WriteVecStore] in {
|
||||||
def MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
|
def MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
|
||||||
"movntps\t{$src, $dst|$dst, $src}",
|
"movntps\t{$src, $dst|$dst, $src}",
|
||||||
[(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)],
|
[(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)]>;
|
||||||
IIC_SSE_MOVNT>;
|
|
||||||
def MOVNTPDmr : PDI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
|
def MOVNTPDmr : PDI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
|
||||||
"movntpd\t{$src, $dst|$dst, $src}",
|
"movntpd\t{$src, $dst|$dst, $src}",
|
||||||
[(alignednontemporalstore(v2f64 VR128:$src), addr:$dst)],
|
[(alignednontemporalstore(v2f64 VR128:$src), addr:$dst)]>;
|
||||||
IIC_SSE_MOVNT>;
|
|
||||||
} // SchedRW
|
} // SchedRW
|
||||||
|
|
||||||
let ExeDomain = SSEPackedInt, SchedRW = [WriteVecStore] in
|
let ExeDomain = SSEPackedInt, SchedRW = [WriteVecStore] in
|
||||||
def MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
|
def MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
|
||||||
"movntdq\t{$src, $dst|$dst, $src}",
|
"movntdq\t{$src, $dst|$dst, $src}",
|
||||||
[(alignednontemporalstore (v2i64 VR128:$src), addr:$dst)],
|
[(alignednontemporalstore (v2i64 VR128:$src), addr:$dst)]>;
|
||||||
IIC_SSE_MOVNT>;
|
|
||||||
|
|
||||||
let SchedRW = [WriteStore] in {
|
let SchedRW = [WriteStore] in {
|
||||||
// There is no AVX form for instructions below this point
|
// There is no AVX form for instructions below this point
|
||||||
def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
|
def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
|
||||||
"movnti{l}\t{$src, $dst|$dst, $src}",
|
"movnti{l}\t{$src, $dst|$dst, $src}",
|
||||||
[(nontemporalstore (i32 GR32:$src), addr:$dst)],
|
[(nontemporalstore (i32 GR32:$src), addr:$dst)]>,
|
||||||
IIC_SSE_MOVNT>,
|
|
||||||
PS, Requires<[HasSSE2]>;
|
PS, Requires<[HasSSE2]>;
|
||||||
def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
|
def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
|
||||||
"movnti{q}\t{$src, $dst|$dst, $src}",
|
"movnti{q}\t{$src, $dst|$dst, $src}",
|
||||||
[(nontemporalstore (i64 GR64:$src), addr:$dst)],
|
[(nontemporalstore (i64 GR64:$src), addr:$dst)]>,
|
||||||
IIC_SSE_MOVNT>,
|
|
||||||
PS, Requires<[HasSSE2]>;
|
PS, Requires<[HasSSE2]>;
|
||||||
} // SchedRW = [WriteStore]
|
} // SchedRW = [WriteStore]
|
||||||
|
|
||||||
|
@ -4820,17 +4809,16 @@ let SchedRW = [WriteVecLoad] in {
|
||||||
let Predicates = [HasAVX] in {
|
let Predicates = [HasAVX] in {
|
||||||
def VLDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
|
def VLDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
|
||||||
"vlddqu\t{$src, $dst|$dst, $src}",
|
"vlddqu\t{$src, $dst|$dst, $src}",
|
||||||
[(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))],
|
[(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>,
|
||||||
IIC_SSE_LDDQU>, VEX, VEX_WIG;
|
VEX, VEX_WIG;
|
||||||
def VLDDQUYrm : S3DI<0xF0, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
|
def VLDDQUYrm : S3DI<0xF0, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
|
||||||
"vlddqu\t{$src, $dst|$dst, $src}",
|
"vlddqu\t{$src, $dst|$dst, $src}",
|
||||||
[(set VR256:$dst, (int_x86_avx_ldu_dq_256 addr:$src))],
|
[(set VR256:$dst, (int_x86_avx_ldu_dq_256 addr:$src))]>,
|
||||||
IIC_SSE_LDDQU>, VEX, VEX_L, VEX_WIG;
|
VEX, VEX_L, VEX_WIG;
|
||||||
} // Predicates
|
} // Predicates
|
||||||
def LDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
|
def LDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
|
||||||
"lddqu\t{$src, $dst|$dst, $src}",
|
"lddqu\t{$src, $dst|$dst, $src}",
|
||||||
[(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))],
|
[(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>;
|
||||||
IIC_SSE_LDDQU>;
|
|
||||||
} // SchedRW
|
} // SchedRW
|
||||||
|
|
||||||
//===---------------------------------------------------------------------===//
|
//===---------------------------------------------------------------------===//
|
||||||
|
@ -6220,35 +6208,33 @@ defm VTESTPDY : avx_bittest<0x0F, "vtestpd", VR256, f256mem, loadv4f64, v4f64>,
|
||||||
let Defs = [EFLAGS], Predicates = [HasPOPCNT] in {
|
let Defs = [EFLAGS], Predicates = [HasPOPCNT] in {
|
||||||
def POPCNT16rr : I<0xB8, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
|
def POPCNT16rr : I<0xB8, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
|
||||||
"popcnt{w}\t{$src, $dst|$dst, $src}",
|
"popcnt{w}\t{$src, $dst|$dst, $src}",
|
||||||
[(set GR16:$dst, (ctpop GR16:$src)), (implicit EFLAGS)],
|
[(set GR16:$dst, (ctpop GR16:$src)), (implicit EFLAGS)]>,
|
||||||
IIC_SSE_POPCNT_RR>, Sched<[WritePOPCNT]>,
|
Sched<[WritePOPCNT]>, OpSize16, XS;
|
||||||
OpSize16, XS;
|
|
||||||
def POPCNT16rm : I<0xB8, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
|
def POPCNT16rm : I<0xB8, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
|
||||||
"popcnt{w}\t{$src, $dst|$dst, $src}",
|
"popcnt{w}\t{$src, $dst|$dst, $src}",
|
||||||
[(set GR16:$dst, (ctpop (loadi16 addr:$src))),
|
[(set GR16:$dst, (ctpop (loadi16 addr:$src))),
|
||||||
(implicit EFLAGS)], IIC_SSE_POPCNT_RM>,
|
(implicit EFLAGS)]>,
|
||||||
Sched<[WritePOPCNTLd]>, OpSize16, XS;
|
Sched<[WritePOPCNTLd]>, OpSize16, XS;
|
||||||
|
|
||||||
def POPCNT32rr : I<0xB8, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
|
def POPCNT32rr : I<0xB8, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
|
||||||
"popcnt{l}\t{$src, $dst|$dst, $src}",
|
"popcnt{l}\t{$src, $dst|$dst, $src}",
|
||||||
[(set GR32:$dst, (ctpop GR32:$src)), (implicit EFLAGS)],
|
[(set GR32:$dst, (ctpop GR32:$src)), (implicit EFLAGS)]>,
|
||||||
IIC_SSE_POPCNT_RR>, Sched<[WritePOPCNT]>,
|
Sched<[WritePOPCNT]>, OpSize32, XS;
|
||||||
OpSize32, XS;
|
|
||||||
|
|
||||||
def POPCNT32rm : I<0xB8, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
|
def POPCNT32rm : I<0xB8, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
|
||||||
"popcnt{l}\t{$src, $dst|$dst, $src}",
|
"popcnt{l}\t{$src, $dst|$dst, $src}",
|
||||||
[(set GR32:$dst, (ctpop (loadi32 addr:$src))),
|
[(set GR32:$dst, (ctpop (loadi32 addr:$src))),
|
||||||
(implicit EFLAGS)], IIC_SSE_POPCNT_RM>,
|
(implicit EFLAGS)]>,
|
||||||
Sched<[WritePOPCNTLd]>, OpSize32, XS;
|
Sched<[WritePOPCNTLd]>, OpSize32, XS;
|
||||||
|
|
||||||
def POPCNT64rr : RI<0xB8, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
|
def POPCNT64rr : RI<0xB8, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
|
||||||
"popcnt{q}\t{$src, $dst|$dst, $src}",
|
"popcnt{q}\t{$src, $dst|$dst, $src}",
|
||||||
[(set GR64:$dst, (ctpop GR64:$src)), (implicit EFLAGS)],
|
[(set GR64:$dst, (ctpop GR64:$src)), (implicit EFLAGS)]>,
|
||||||
IIC_SSE_POPCNT_RR>, Sched<[WritePOPCNT]>, XS;
|
Sched<[WritePOPCNT]>, XS;
|
||||||
def POPCNT64rm : RI<0xB8, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
|
def POPCNT64rm : RI<0xB8, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
|
||||||
"popcnt{q}\t{$src, $dst|$dst, $src}",
|
"popcnt{q}\t{$src, $dst|$dst, $src}",
|
||||||
[(set GR64:$dst, (ctpop (loadi64 addr:$src))),
|
[(set GR64:$dst, (ctpop (loadi64 addr:$src))),
|
||||||
(implicit EFLAGS)], IIC_SSE_POPCNT_RM>,
|
(implicit EFLAGS)]>,
|
||||||
Sched<[WritePOPCNTLd]>, XS;
|
Sched<[WritePOPCNTLd]>, XS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -7072,15 +7058,15 @@ class SS42I_crc32r<bits<8> opc, string asm, RegisterClass RCOut,
|
||||||
RegisterClass RCIn, SDPatternOperator Int> :
|
RegisterClass RCIn, SDPatternOperator Int> :
|
||||||
SS42FI<opc, MRMSrcReg, (outs RCOut:$dst), (ins RCOut:$src1, RCIn:$src2),
|
SS42FI<opc, MRMSrcReg, (outs RCOut:$dst), (ins RCOut:$src1, RCIn:$src2),
|
||||||
!strconcat(asm, "\t{$src2, $src1|$src1, $src2}"),
|
!strconcat(asm, "\t{$src2, $src1|$src1, $src2}"),
|
||||||
[(set RCOut:$dst, (Int RCOut:$src1, RCIn:$src2))], IIC_CRC32_REG>,
|
[(set RCOut:$dst, (Int RCOut:$src1, RCIn:$src2))]>,
|
||||||
Sched<[WriteCRC32]>;
|
Sched<[WriteCRC32]>;
|
||||||
|
|
||||||
class SS42I_crc32m<bits<8> opc, string asm, RegisterClass RCOut,
|
class SS42I_crc32m<bits<8> opc, string asm, RegisterClass RCOut,
|
||||||
X86MemOperand x86memop, SDPatternOperator Int> :
|
X86MemOperand x86memop, SDPatternOperator Int> :
|
||||||
SS42FI<opc, MRMSrcMem, (outs RCOut:$dst), (ins RCOut:$src1, x86memop:$src2),
|
SS42FI<opc, MRMSrcMem, (outs RCOut:$dst), (ins RCOut:$src1, x86memop:$src2),
|
||||||
!strconcat(asm, "\t{$src2, $src1|$src1, $src2}"),
|
!strconcat(asm, "\t{$src2, $src1|$src1, $src2}"),
|
||||||
[(set RCOut:$dst, (Int RCOut:$src1, (load addr:$src2)))],
|
[(set RCOut:$dst, (Int RCOut:$src1, (load addr:$src2)))]>,
|
||||||
IIC_CRC32_MEM>, Sched<[WriteCRC32Ld, ReadAfterLd]>;
|
Sched<[WriteCRC32Ld, ReadAfterLd]>;
|
||||||
|
|
||||||
let Constraints = "$src1 = $dst" in {
|
let Constraints = "$src1 = $dst" in {
|
||||||
def CRC32r32m8 : SS42I_crc32m<0xF0, "crc32{b}", GR32, i8mem,
|
def CRC32r32m8 : SS42I_crc32m<0xF0, "crc32{b}", GR32, i8mem,
|
||||||
|
@ -7114,7 +7100,7 @@ let Constraints = "$src1 = $dst" in {
|
||||||
|
|
||||||
// FIXME: Is there a better scheduler itinerary for SHA than WriteVecIMul?
|
// FIXME: Is there a better scheduler itinerary for SHA than WriteVecIMul?
|
||||||
multiclass SHAI_binop<bits<8> Opc, string OpcodeStr, Intrinsic IntId,
|
multiclass SHAI_binop<bits<8> Opc, string OpcodeStr, Intrinsic IntId,
|
||||||
OpndItins itins, bit UsesXMM0 = 0> {
|
X86FoldableSchedWrite sched, bit UsesXMM0 = 0> {
|
||||||
def rr : I<Opc, MRMSrcReg, (outs VR128:$dst),
|
def rr : I<Opc, MRMSrcReg, (outs VR128:$dst),
|
||||||
(ins VR128:$src1, VR128:$src2),
|
(ins VR128:$src1, VR128:$src2),
|
||||||
!if(UsesXMM0,
|
!if(UsesXMM0,
|
||||||
|
@ -7122,8 +7108,8 @@ multiclass SHAI_binop<bits<8> Opc, string OpcodeStr, Intrinsic IntId,
|
||||||
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}")),
|
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}")),
|
||||||
[!if(UsesXMM0,
|
[!if(UsesXMM0,
|
||||||
(set VR128:$dst, (IntId VR128:$src1, VR128:$src2, XMM0)),
|
(set VR128:$dst, (IntId VR128:$src1, VR128:$src2, XMM0)),
|
||||||
(set VR128:$dst, (IntId VR128:$src1, VR128:$src2)))], itins.rr>,
|
(set VR128:$dst, (IntId VR128:$src1, VR128:$src2)))]>,
|
||||||
T8, Sched<[itins.Sched]>;
|
T8, Sched<[sched]>;
|
||||||
|
|
||||||
def rm : I<Opc, MRMSrcMem, (outs VR128:$dst),
|
def rm : I<Opc, MRMSrcMem, (outs VR128:$dst),
|
||||||
(ins VR128:$src1, i128mem:$src2),
|
(ins VR128:$src1, i128mem:$src2),
|
||||||
|
@ -7134,8 +7120,8 @@ multiclass SHAI_binop<bits<8> Opc, string OpcodeStr, Intrinsic IntId,
|
||||||
(set VR128:$dst, (IntId VR128:$src1,
|
(set VR128:$dst, (IntId VR128:$src1,
|
||||||
(bc_v4i32 (memopv2i64 addr:$src2)), XMM0)),
|
(bc_v4i32 (memopv2i64 addr:$src2)), XMM0)),
|
||||||
(set VR128:$dst, (IntId VR128:$src1,
|
(set VR128:$dst, (IntId VR128:$src1,
|
||||||
(bc_v4i32 (memopv2i64 addr:$src2)))))], itins.rm>, T8,
|
(bc_v4i32 (memopv2i64 addr:$src2)))))]>, T8,
|
||||||
Sched<[itins.Sched.Folded, ReadAfterLd]>;
|
Sched<[sched.Folded, ReadAfterLd]>;
|
||||||
}
|
}
|
||||||
|
|
||||||
let Constraints = "$src1 = $dst", Predicates = [HasSHA] in {
|
let Constraints = "$src1 = $dst", Predicates = [HasSHA] in {
|
||||||
|
@ -7153,23 +7139,23 @@ let Constraints = "$src1 = $dst", Predicates = [HasSHA] in {
|
||||||
(int_x86_sha1rnds4 VR128:$src1,
|
(int_x86_sha1rnds4 VR128:$src1,
|
||||||
(bc_v4i32 (memopv2i64 addr:$src2)),
|
(bc_v4i32 (memopv2i64 addr:$src2)),
|
||||||
(i8 imm:$src3)))], IIC_SSE_INTMUL_P_RM>, TA,
|
(i8 imm:$src3)))], IIC_SSE_INTMUL_P_RM>, TA,
|
||||||
Sched<[WriteVecIMulLd, ReadAfterLd]>;
|
Sched<[WriteVecIMul.Folded, ReadAfterLd]>;
|
||||||
|
|
||||||
defm SHA1NEXTE : SHAI_binop<0xC8, "sha1nexte", int_x86_sha1nexte,
|
defm SHA1NEXTE : SHAI_binop<0xC8, "sha1nexte", int_x86_sha1nexte,
|
||||||
SSE_INTMUL_ITINS_P>;
|
WriteVecIMul>;
|
||||||
defm SHA1MSG1 : SHAI_binop<0xC9, "sha1msg1", int_x86_sha1msg1,
|
defm SHA1MSG1 : SHAI_binop<0xC9, "sha1msg1", int_x86_sha1msg1,
|
||||||
SSE_INTMUL_ITINS_P>;
|
WriteVecIMul>;
|
||||||
defm SHA1MSG2 : SHAI_binop<0xCA, "sha1msg2", int_x86_sha1msg2,
|
defm SHA1MSG2 : SHAI_binop<0xCA, "sha1msg2", int_x86_sha1msg2,
|
||||||
SSE_INTMUL_ITINS_P>;
|
WriteVecIMul>;
|
||||||
|
|
||||||
let Uses=[XMM0] in
|
let Uses=[XMM0] in
|
||||||
defm SHA256RNDS2 : SHAI_binop<0xCB, "sha256rnds2", int_x86_sha256rnds2,
|
defm SHA256RNDS2 : SHAI_binop<0xCB, "sha256rnds2", int_x86_sha256rnds2,
|
||||||
SSE_INTMUL_ITINS_P, 1>;
|
WriteVecIMul, 1>;
|
||||||
|
|
||||||
defm SHA256MSG1 : SHAI_binop<0xCC, "sha256msg1", int_x86_sha256msg1,
|
defm SHA256MSG1 : SHAI_binop<0xCC, "sha256msg1", int_x86_sha256msg1,
|
||||||
SSE_INTMUL_ITINS_P>;
|
WriteVecIMul>;
|
||||||
defm SHA256MSG2 : SHAI_binop<0xCD, "sha256msg2", int_x86_sha256msg2,
|
defm SHA256MSG2 : SHAI_binop<0xCD, "sha256msg2", int_x86_sha256msg2,
|
||||||
SSE_INTMUL_ITINS_P>;
|
WriteVecIMul>;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Aliases with explicit %xmm0
|
// Aliases with explicit %xmm0
|
||||||
|
@ -7309,16 +7295,16 @@ let Predicates = [NoAVX, HasPCLMUL] in {
|
||||||
(ins VR128:$src1, VR128:$src2, u8imm:$src3),
|
(ins VR128:$src1, VR128:$src2, u8imm:$src3),
|
||||||
"pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}",
|
"pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}",
|
||||||
[(set VR128:$dst,
|
[(set VR128:$dst,
|
||||||
(int_x86_pclmulqdq VR128:$src1, VR128:$src2, imm:$src3))],
|
(int_x86_pclmulqdq VR128:$src1, VR128:$src2, imm:$src3))]>,
|
||||||
IIC_SSE_PCLMULQDQ_RR>, Sched<[WriteCLMul]>;
|
Sched<[WriteCLMul]>;
|
||||||
|
|
||||||
def PCLMULQDQrm : PCLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst),
|
def PCLMULQDQrm : PCLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst),
|
||||||
(ins VR128:$src1, i128mem:$src2, u8imm:$src3),
|
(ins VR128:$src1, i128mem:$src2, u8imm:$src3),
|
||||||
"pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}",
|
"pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}",
|
||||||
[(set VR128:$dst,
|
[(set VR128:$dst,
|
||||||
(int_x86_pclmulqdq VR128:$src1, (memopv2i64 addr:$src2),
|
(int_x86_pclmulqdq VR128:$src1, (memopv2i64 addr:$src2),
|
||||||
imm:$src3))],
|
imm:$src3))]>,
|
||||||
IIC_SSE_PCLMULQDQ_RM>, Sched<[WriteCLMulLd, ReadAfterLd]>;
|
Sched<[WriteCLMulLd, ReadAfterLd]>;
|
||||||
} // Constraints = "$src1 = $dst"
|
} // Constraints = "$src1 = $dst"
|
||||||
|
|
||||||
def : Pat<(int_x86_pclmulqdq (memopv2i64 addr:$src2), VR128:$src1,
|
def : Pat<(int_x86_pclmulqdq (memopv2i64 addr:$src2), VR128:$src1,
|
||||||
|
@ -7346,15 +7332,15 @@ multiclass vpclmulqdq<RegisterClass RC, X86MemOperand MemOp,
|
||||||
(ins RC:$src1, RC:$src2, u8imm:$src3),
|
(ins RC:$src1, RC:$src2, u8imm:$src3),
|
||||||
"vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
|
"vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
|
||||||
[(set RC:$dst,
|
[(set RC:$dst,
|
||||||
(IntId RC:$src1, RC:$src2, imm:$src3))], IIC_SSE_PCLMULQDQ_RR>,
|
(IntId RC:$src1, RC:$src2, imm:$src3))]>,
|
||||||
Sched<[WriteCLMul]>;
|
Sched<[WriteCLMul]>;
|
||||||
|
|
||||||
def rm : PCLMULIi8<0x44, MRMSrcMem, (outs RC:$dst),
|
def rm : PCLMULIi8<0x44, MRMSrcMem, (outs RC:$dst),
|
||||||
(ins RC:$src1, MemOp:$src2, u8imm:$src3),
|
(ins RC:$src1, MemOp:$src2, u8imm:$src3),
|
||||||
"vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
|
"vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
|
||||||
[(set RC:$dst,
|
[(set RC:$dst,
|
||||||
(IntId RC:$src1, (LdFrag addr:$src2), imm:$src3))],
|
(IntId RC:$src1, (LdFrag addr:$src2), imm:$src3))]>,
|
||||||
IIC_SSE_PCLMULQDQ_RM>, Sched<[WriteCLMulLd, ReadAfterLd]>;
|
Sched<[WriteCLMulLd, ReadAfterLd]>;
|
||||||
|
|
||||||
// We can commute a load in the first operand by swapping the sources and
|
// We can commute a load in the first operand by swapping the sources and
|
||||||
// rotating the immediate.
|
// rotating the immediate.
|
||||||
|
@ -7433,10 +7419,10 @@ def INSERTQ : I<0x79, MRMSrcReg, (outs VR128:$dst),
|
||||||
let AddedComplexity = 400 in { // Prefer non-temporal versions
|
let AddedComplexity = 400 in { // Prefer non-temporal versions
|
||||||
let hasSideEffects = 0, mayStore = 1, SchedRW = [WriteStore] in {
|
let hasSideEffects = 0, mayStore = 1, SchedRW = [WriteStore] in {
|
||||||
def MOVNTSS : I<0x2B, MRMDestMem, (outs), (ins f32mem:$dst, VR128:$src),
|
def MOVNTSS : I<0x2B, MRMDestMem, (outs), (ins f32mem:$dst, VR128:$src),
|
||||||
"movntss\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVNT>, XS;
|
"movntss\t{$src, $dst|$dst, $src}", []>, XS;
|
||||||
|
|
||||||
def MOVNTSD : I<0x2B, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
|
def MOVNTSD : I<0x2B, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
|
||||||
"movntsd\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVNT>, XD;
|
"movntsd\t{$src, $dst|$dst, $src}", []>, XD;
|
||||||
} // SchedRW
|
} // SchedRW
|
||||||
|
|
||||||
def : Pat<(nontemporalstore FR32:$src, addr:$dst),
|
def : Pat<(nontemporalstore FR32:$src, addr:$dst),
|
||||||
|
@ -7737,6 +7723,7 @@ let ExeDomain = SSEPackedDouble in {
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// VPERM2F128 - Permute Floating-Point Values in 128-bit chunks
|
// VPERM2F128 - Permute Floating-Point Values in 128-bit chunks
|
||||||
//
|
//
|
||||||
|
|
||||||
let ExeDomain = SSEPackedSingle in {
|
let ExeDomain = SSEPackedSingle in {
|
||||||
let isCommutable = 1 in
|
let isCommutable = 1 in
|
||||||
def VPERM2F128rr : AVXAIi8<0x06, MRMSrcReg, (outs VR256:$dst),
|
def VPERM2F128rr : AVXAIi8<0x06, MRMSrcReg, (outs VR256:$dst),
|
||||||
|
@ -7779,26 +7766,28 @@ def : Pat<(v4i64 (X86VPerm2x128 (loadv4i64 addr:$src2),
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// VZERO - Zero YMM registers
|
// VZERO - Zero YMM registers
|
||||||
|
// Note: These instruction do not affect the YMM16-YMM31.
|
||||||
//
|
//
|
||||||
// Note, these instruction do not affect the YMM16-YMM31.
|
|
||||||
let SchedRW = [WriteSystem] in {
|
let SchedRW = [WriteSystem] in {
|
||||||
let Defs = [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7,
|
let Defs = [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7,
|
||||||
YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15] in {
|
YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15] in {
|
||||||
// Zero All YMM registers
|
// Zero All YMM registers
|
||||||
def VZEROALL : I<0x77, RawFrm, (outs), (ins), "vzeroall",
|
def VZEROALL : I<0x77, RawFrm, (outs), (ins), "vzeroall",
|
||||||
[(int_x86_avx_vzeroall)], IIC_AVX_ZERO>, PS, VEX, VEX_L,
|
[(int_x86_avx_vzeroall)]>, PS, VEX, VEX_L,
|
||||||
Requires<[HasAVX]>, VEX_WIG;
|
Requires<[HasAVX]>, VEX_WIG;
|
||||||
|
|
||||||
// Zero Upper bits of YMM registers
|
// Zero Upper bits of YMM registers
|
||||||
def VZEROUPPER : I<0x77, RawFrm, (outs), (ins), "vzeroupper",
|
def VZEROUPPER : I<0x77, RawFrm, (outs), (ins), "vzeroupper",
|
||||||
[(int_x86_avx_vzeroupper)], IIC_AVX_ZERO>, PS, VEX,
|
[(int_x86_avx_vzeroupper)]>, PS, VEX,
|
||||||
Requires<[HasAVX]>, VEX_WIG;
|
Requires<[HasAVX]>, VEX_WIG;
|
||||||
} // Defs
|
} // Defs
|
||||||
} // SchedRW
|
} // SchedRW
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// Half precision conversion instructions
|
// Half precision conversion instructions
|
||||||
//===----------------------------------------------------------------------===//
|
//
|
||||||
|
|
||||||
multiclass f16c_ph2ps<RegisterClass RC, X86MemOperand x86memop> {
|
multiclass f16c_ph2ps<RegisterClass RC, X86MemOperand x86memop> {
|
||||||
def rr : I<0x13, MRMSrcReg, (outs RC:$dst), (ins VR128:$src),
|
def rr : I<0x13, MRMSrcReg, (outs RC:$dst), (ins VR128:$src),
|
||||||
"vcvtph2ps\t{$src, $dst|$dst, $src}",
|
"vcvtph2ps\t{$src, $dst|$dst, $src}",
|
||||||
|
|
|
@ -254,9 +254,6 @@ def IIC_MOVZX : InstrItinClass;
|
||||||
def IIC_MOVZX_R16_R8 : InstrItinClass;
|
def IIC_MOVZX_R16_R8 : InstrItinClass;
|
||||||
def IIC_MOVZX_R16_M8 : InstrItinClass;
|
def IIC_MOVZX_R16_M8 : InstrItinClass;
|
||||||
|
|
||||||
def IIC_REP_MOVS : InstrItinClass;
|
|
||||||
def IIC_REP_STOS : InstrItinClass;
|
|
||||||
|
|
||||||
// SSE scalar/parallel binary operations
|
// SSE scalar/parallel binary operations
|
||||||
def IIC_SSE_ALU_F32S_RR : InstrItinClass;
|
def IIC_SSE_ALU_F32S_RR : InstrItinClass;
|
||||||
def IIC_SSE_ALU_F32S_RM : InstrItinClass;
|
def IIC_SSE_ALU_F32S_RM : InstrItinClass;
|
||||||
|
@ -359,10 +356,6 @@ def IIC_SSE_MOVQ_RR : InstrItinClass;
|
||||||
|
|
||||||
def IIC_SSE_MOV_LH : InstrItinClass;
|
def IIC_SSE_MOV_LH : InstrItinClass;
|
||||||
|
|
||||||
def IIC_SSE_LDDQU : InstrItinClass;
|
|
||||||
|
|
||||||
def IIC_SSE_MOVNT : InstrItinClass;
|
|
||||||
|
|
||||||
def IIC_SSE_PHADDSUBD_RR : InstrItinClass;
|
def IIC_SSE_PHADDSUBD_RR : InstrItinClass;
|
||||||
def IIC_SSE_PHADDSUBD_RM : InstrItinClass;
|
def IIC_SSE_PHADDSUBD_RM : InstrItinClass;
|
||||||
def IIC_SSE_PHADDSUBSW_RR : InstrItinClass;
|
def IIC_SSE_PHADDSUBSW_RR : InstrItinClass;
|
||||||
|
@ -391,14 +384,7 @@ def IIC_SSE_CVT_SS2SI64_RR : InstrItinClass;
|
||||||
def IIC_SSE_CVT_SD2SI_RM : InstrItinClass;
|
def IIC_SSE_CVT_SD2SI_RM : InstrItinClass;
|
||||||
def IIC_SSE_CVT_SD2SI_RR : InstrItinClass;
|
def IIC_SSE_CVT_SD2SI_RR : InstrItinClass;
|
||||||
|
|
||||||
def IIC_AVX_ZERO : InstrItinClass;
|
|
||||||
|
|
||||||
def IIC_AES : InstrItinClass;
|
|
||||||
def IIC_BLEND_MEM : InstrItinClass;
|
|
||||||
def IIC_BLEND_NOMEM : InstrItinClass;
|
|
||||||
def IIC_CBW : InstrItinClass;
|
def IIC_CBW : InstrItinClass;
|
||||||
def IIC_CRC32_REG : InstrItinClass;
|
|
||||||
def IIC_CRC32_MEM : InstrItinClass;
|
|
||||||
def IIC_SSE_DPPD_RR : InstrItinClass;
|
def IIC_SSE_DPPD_RR : InstrItinClass;
|
||||||
def IIC_SSE_DPPD_RM : InstrItinClass;
|
def IIC_SSE_DPPD_RM : InstrItinClass;
|
||||||
def IIC_SSE_DPPS_RR : InstrItinClass;
|
def IIC_SSE_DPPS_RR : InstrItinClass;
|
||||||
|
@ -415,10 +401,6 @@ def IIC_SSE_ROUNDPS_REG : InstrItinClass;
|
||||||
def IIC_SSE_ROUNDPS_MEM : InstrItinClass;
|
def IIC_SSE_ROUNDPS_MEM : InstrItinClass;
|
||||||
def IIC_SSE_ROUNDPD_REG : InstrItinClass;
|
def IIC_SSE_ROUNDPD_REG : InstrItinClass;
|
||||||
def IIC_SSE_ROUNDPD_MEM : InstrItinClass;
|
def IIC_SSE_ROUNDPD_MEM : InstrItinClass;
|
||||||
def IIC_SSE_POPCNT_RR : InstrItinClass;
|
|
||||||
def IIC_SSE_POPCNT_RM : InstrItinClass;
|
|
||||||
def IIC_SSE_PCLMULQDQ_RR : InstrItinClass;
|
|
||||||
def IIC_SSE_PCLMULQDQ_RM : InstrItinClass;
|
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// Processor instruction itineraries.
|
// Processor instruction itineraries.
|
||||||
|
|
Loading…
Reference in New Issue