[X86] Add ReadAfterLds to some 3 src instructions

Sometimes the operand comes after the memory operand so we need 5 ReadDefaults first.

I suspect we also need to do something for the mask operand for masked avx512 instructions? I'm not sure if the mask should be ReadAfterLd or not since it can mask faults. If it shouldn't be ReadAfterLd then we're probably wrong for zero masking instructions already.

Differential Revision: https://reviews.llvm.org/D44726

llvm-svn: 328834
This commit is contained in:
Craig Topper 2018-03-29 22:03:05 +00:00
parent dd4baff48d
commit ee3c19fd7f
4 changed files with 64 additions and 23 deletions

View File

@ -51,7 +51,7 @@ multiclass fma3p_rm_213<bits<8> opc, string OpcodeStr, RegisterClass RC,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[(set RC:$dst, (VT (Op RC:$src2, RC:$src1,
(MemFrag addr:$src3))))]>,
Sched<[WriteFMALd, ReadAfterLd]>;
Sched<[WriteFMALd, ReadAfterLd, ReadAfterLd]>;
}
multiclass fma3p_rm_231<bits<8> opc, string OpcodeStr, RegisterClass RC,
@ -70,7 +70,8 @@ multiclass fma3p_rm_231<bits<8> opc, string OpcodeStr, RegisterClass RC,
!strconcat(OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[(set RC:$dst, (VT (Op RC:$src2, (MemFrag addr:$src3),
RC:$src1)))]>, Sched<[WriteFMALd, ReadAfterLd]>;
RC:$src1)))]>,
Sched<[WriteFMALd, ReadAfterLd, ReadAfterLd]>;
}
multiclass fma3p_rm_132<bits<8> opc, string OpcodeStr, RegisterClass RC,
@ -91,7 +92,8 @@ multiclass fma3p_rm_132<bits<8> opc, string OpcodeStr, RegisterClass RC,
!strconcat(OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[(set RC:$dst, (VT (Op (MemFrag addr:$src3), RC:$src1,
RC:$src2)))]>, Sched<[WriteFMALd, ReadAfterLd]>;
RC:$src2)))]>,
Sched<[WriteFMALd, ReadAfterLd, ReadAfterLd]>;
}
let Constraints = "$src1 = $dst", hasSideEffects = 0, isCommutable = 1 in
@ -184,7 +186,7 @@ multiclass fma3s_rm_213<bits<8> opc, string OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[(set RC:$dst,
(OpNode RC:$src2, RC:$src1, (load addr:$src3)))]>,
Sched<[WriteFMALd, ReadAfterLd]>;
Sched<[WriteFMALd, ReadAfterLd, ReadAfterLd]>;
}
multiclass fma3s_rm_231<bits<8> opc, string OpcodeStr,
@ -204,7 +206,7 @@ multiclass fma3s_rm_231<bits<8> opc, string OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[(set RC:$dst,
(OpNode RC:$src2, (load addr:$src3), RC:$src1))]>,
Sched<[WriteFMALd, ReadAfterLd]>;
Sched<[WriteFMALd, ReadAfterLd, ReadAfterLd]>;
}
multiclass fma3s_rm_132<bits<8> opc, string OpcodeStr,
@ -226,7 +228,7 @@ multiclass fma3s_rm_132<bits<8> opc, string OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[(set RC:$dst,
(OpNode (load addr:$src3), RC:$src1, RC:$src2))]>,
Sched<[WriteFMALd, ReadAfterLd]>;
Sched<[WriteFMALd, ReadAfterLd, ReadAfterLd]>;
}
let Constraints = "$src1 = $dst", isCommutable = 1, hasSideEffects = 0 in
@ -270,7 +272,7 @@ multiclass fma3s_rm_int<bits<8> opc, string OpcodeStr,
(ins RC:$src1, RC:$src2, memopr:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[]>, Sched<[WriteFMALd, ReadAfterLd]>;
[]>, Sched<[WriteFMALd, ReadAfterLd, ReadAfterLd]>;
}
// The FMA 213 form is created for lowering of scalar FMA intrinscis
@ -374,14 +376,19 @@ multiclass fma4s<bits<8> opc, string OpcodeStr, RegisterClass RC,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set RC:$dst, (OpNode RC:$src1, RC:$src2,
(mem_frag addr:$src3)))]>, VEX_W, VEX_LIG,
Sched<[WriteFMALd, ReadAfterLd]>;
Sched<[WriteFMALd, ReadAfterLd, ReadAfterLd]>;
def mr : FMA4S<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2, RC:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set RC:$dst,
(OpNode RC:$src1, (mem_frag addr:$src2), RC:$src3))]>, VEX_LIG,
Sched<[WriteFMALd, ReadAfterLd]>;
Sched<[WriteFMALd, ReadAfterLd,
// x86memop:$src2
ReadDefault, ReadDefault, ReadDefault, ReadDefault,
ReadDefault,
// RC:$src3
ReadAfterLd]>;
// For disassembler
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
def rr_REV : FMA4S<opc, MRMSrcReg, (outs RC:$dst),
@ -407,14 +414,19 @@ let isCodeGenOnly = 1 in {
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set VR128:$dst, (VT (OpNode VR128:$src1, VR128:$src2,
mem_cpat:$src3)))]>, VEX_W, VEX_LIG,
Sched<[WriteFMALd, ReadAfterLd]>;
Sched<[WriteFMALd, ReadAfterLd, ReadAfterLd]>;
def mr_Int : FMA4S_Int<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, memop:$src2, VR128:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set VR128:$dst,
(VT (OpNode VR128:$src1, mem_cpat:$src2, VR128:$src3)))]>,
VEX_LIG, Sched<[WriteFMALd, ReadAfterLd]>;
VEX_LIG, Sched<[WriteFMALd, ReadAfterLd,
// memop:$src2
ReadDefault, ReadDefault, ReadDefault,
ReadDefault, ReadDefault,
// VR128::$src3
ReadAfterLd]>;
let hasSideEffects = 0 in
def rr_Int_REV : FMA4S_Int<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, VR128:$src3),
@ -441,14 +453,19 @@ multiclass fma4p<bits<8> opc, string OpcodeStr, SDNode OpNode,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set VR128:$dst, (OpNode VR128:$src1, VR128:$src2,
(ld_frag128 addr:$src3)))]>, VEX_W,
Sched<[WriteFMALd, ReadAfterLd]>;
Sched<[WriteFMALd, ReadAfterLd, ReadAfterLd]>;
def mr : FMA4<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, f128mem:$src2, VR128:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set VR128:$dst,
(OpNode VR128:$src1, (ld_frag128 addr:$src2), VR128:$src3))]>,
Sched<[WriteFMALd, ReadAfterLd]>;
Sched<[WriteFMALd, ReadAfterLd,
// f128mem:$src2
ReadDefault, ReadDefault, ReadDefault, ReadDefault,
ReadDefault,
// VR128::$src3
ReadAfterLd]>;
let isCommutable = 1 in
def Yrr : FMA4<opc, MRMSrcRegOp4, (outs VR256:$dst),
(ins VR256:$src1, VR256:$src2, VR256:$src3),
@ -463,14 +480,19 @@ multiclass fma4p<bits<8> opc, string OpcodeStr, SDNode OpNode,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set VR256:$dst, (OpNode VR256:$src1, VR256:$src2,
(ld_frag256 addr:$src3)))]>, VEX_W, VEX_L,
Sched<[WriteFMALd, ReadAfterLd]>;
Sched<[WriteFMALd, ReadAfterLd, ReadAfterLd]>;
def Ymr : FMA4<opc, MRMSrcMem, (outs VR256:$dst),
(ins VR256:$src1, f256mem:$src2, VR256:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set VR256:$dst, (OpNode VR256:$src1,
(ld_frag256 addr:$src2), VR256:$src3))]>, VEX_L,
Sched<[WriteFMALd, ReadAfterLd]>;
Sched<[WriteFMALd, ReadAfterLd,
// f256mem:$src2
ReadDefault, ReadDefault, ReadDefault, ReadDefault,
ReadDefault,
// VR256::$src3
ReadAfterLd]>;
// For disassembler
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in {
def rr_REV : FMA4<opc, MRMSrcReg, (outs VR128:$dst),

View File

@ -6645,7 +6645,12 @@ multiclass SS41I_quaternary_int_avx<bits<8> opc, string OpcodeStr,
(IntId RC:$src1, (bitconvert (mem_frag addr:$src2)),
RC:$src3))],
itins.rm, SSEPackedInt>, TAPD, VEX_4V,
Sched<[itins.Sched.Folded, ReadAfterLd]>;
Sched<[itins.Sched.Folded, ReadAfterLd,
// x86memop:$src2
ReadDefault, ReadDefault, ReadDefault, ReadDefault,
ReadDefault,
// RC::$src3
ReadAfterLd]>;
}
let Predicates = [HasAVX] in {

View File

@ -897,7 +897,7 @@ let hasSideEffects = 0 in {
// x86memop:$src1
ReadDefault, ReadDefault, ReadDefault, ReadDefault,
ReadDefault,
// RC:$src1
// RC:$src2
ReadAfterLd]>;
}
}

View File

@ -287,7 +287,7 @@ multiclass xop4op<bits<8> opc, string OpcodeStr, SDNode OpNode,
[(set VR128:$dst,
(vt128 (OpNode (vt128 VR128:$src1), (vt128 VR128:$src2),
(vt128 (bitconvert (loadv2i64 addr:$src3))))))]>,
XOP_4V, VEX_W, Sched<[WriteShuffleLd, ReadAfterLd]>;
XOP_4V, VEX_W, Sched<[WriteShuffleLd, ReadAfterLd, ReadAfterLd]>;
def rmr : IXOPi8Reg<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2, VR128:$src3),
!strconcat(OpcodeStr,
@ -295,7 +295,12 @@ multiclass xop4op<bits<8> opc, string OpcodeStr, SDNode OpNode,
[(set VR128:$dst,
(v16i8 (OpNode (vt128 VR128:$src1), (vt128 (bitconvert (loadv2i64 addr:$src2))),
(vt128 VR128:$src3))))]>,
XOP_4V, Sched<[WriteShuffleLd, ReadAfterLd]>;
XOP_4V, Sched<[WriteShuffleLd, ReadAfterLd,
// 128mem:$src2
ReadDefault, ReadDefault, ReadDefault, ReadDefault,
ReadDefault,
// VR128:$src3
ReadAfterLd]>;
// For disassembler
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
def rrr_REV : IXOPi8Reg<opc, MRMSrcRegOp4, (outs VR128:$dst),
@ -325,14 +330,19 @@ multiclass xop4op_int<bits<8> opc, string OpcodeStr, RegisterClass RC,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set RC:$dst, (VT (or (and (load addr:$src3), RC:$src1),
(X86andnp (load addr:$src3), RC:$src2))))]>,
XOP_4V, VEX_W, Sched<[WriteShuffleLd, ReadAfterLd]>;
XOP_4V, VEX_W, Sched<[WriteShuffleLd, ReadAfterLd, ReadAfterLd]>;
def rmr : IXOPi8Reg<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2, RC:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set RC:$dst, (VT (or (and RC:$src3, RC:$src1),
(X86andnp RC:$src3, (load addr:$src2)))))]>,
XOP_4V, Sched<[WriteShuffleLd, ReadAfterLd]>;
XOP_4V, Sched<[WriteShuffleLd, ReadAfterLd,
// x86memop:$src2
ReadDefault, ReadDefault, ReadDefault, ReadDefault,
ReadDefault,
// RC::$src3
ReadAfterLd]>;
// For disassembler
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
def rrr_REV : IXOPi8Reg<opc, MRMSrcRegOp4, (outs RC:$dst),
@ -366,7 +376,7 @@ multiclass xop_vpermil2<bits<8> Opc, string OpcodeStr, RegisterClass RC,
(VT (X86vpermil2 RC:$src1, RC:$src2,
(bitconvert (IntLdFrag addr:$src3)),
(i8 imm:$src4))))]>, VEX_W,
Sched<[WriteFShuffleLd, ReadAfterLd]>;
Sched<[WriteFShuffleLd, ReadAfterLd, ReadAfterLd]>;
def mr : IXOP5<Opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, fpmemop:$src2, RC:$src3, u8imm:$src4),
!strconcat(OpcodeStr,
@ -374,7 +384,11 @@ multiclass xop_vpermil2<bits<8> Opc, string OpcodeStr, RegisterClass RC,
[(set RC:$dst,
(VT (X86vpermil2 RC:$src1, (FPLdFrag addr:$src2),
RC:$src3, (i8 imm:$src4))))]>,
Sched<[WriteFShuffleLd, ReadAfterLd]>;
Sched<[WriteFShuffleLd, ReadAfterLd,
// fpmemop:$src2
ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault,
// RC:$src3
ReadAfterLd]>;
// For disassembler
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
def rr_REV : IXOP5<Opc, MRMSrcRegOp4, (outs RC:$dst),