diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index e087b4e7fab5..0979c9658a0e 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -6914,6 +6914,8 @@ multiclass avx512_fma3s_all opc213, bits<8> opc231, bits<8> opc132, (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, (_.ScalarLdFrag addr:$src3), _.FRC:$src1))), 1>; + // One pattern is 312 order so that the load is in a different place from the + // 213 and 231 patterns this helps tablegen's duplicate pattern detection. defm NAME#132#SUFF#Z: avx512_fma3s_common opc213, bits<8> opc231, bits<8> opc132, (null_frag), (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src1, _.FRC:$src3, _.FRC:$src2))), - (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src1, - (_.ScalarLdFrag addr:$src3), _.FRC:$src2))), 1>; + (set _.FRC:$dst, (_.EltVT (OpNode (_.ScalarLdFrag addr:$src3), + _.FRC:$src1, _.FRC:$src2))), 1>; } } diff --git a/llvm/lib/Target/X86/X86InstrFMA.td b/llvm/lib/Target/X86/X86InstrFMA.td index 163fe5db76ef..453dcd83df1f 100644 --- a/llvm/lib/Target/X86/X86InstrFMA.td +++ b/llvm/lib/Target/X86/X86InstrFMA.td @@ -15,8 +15,8 @@ // FMA3 - Intel 3 operand Fused Multiply-Add instructions //===----------------------------------------------------------------------===// -// For all FMA opcodes declared in fma3p_rm and fma3s_rm milticlasses defined -// below, both the register and memory variants are commutable. +// For all FMA opcodes declared in fma3p_rm_* and fma3s_rm_* milticlasses +// defined below, both the register and memory variants are commutable. // For the register form the commutable operands are 1, 2 and 3. // For the memory variant the folded operand must be in 3. Thus, // in that case, only the operands 1 and 2 can be swapped. @@ -34,56 +34,85 @@ // operands 1 and 3 (register forms only): *231* --> *213*; // operands 2 and 3 (register forms only): *231* --> *231*(no changes). -let Constraints = "$src1 = $dst", hasSideEffects = 0, isCommutable = 1 in -multiclass fma3p_rm opc, string OpcodeStr, - PatFrag MemFrag128, PatFrag MemFrag256, - ValueType OpVT128, ValueType OpVT256, - SDPatternOperator Op = null_frag> { - def r : FMA3 opc, string OpcodeStr, RegisterClass RC, + ValueType VT, X86MemOperand x86memop, PatFrag MemFrag, + SDNode Op> { + def r : FMA3; + [(set RC:$dst, (VT (Op RC:$src2, RC:$src1, RC:$src3)))]>; let mayLoad = 1 in - def m : FMA3; - - def Yr : FMA3, VEX_L; - - let mayLoad = 1 in - def Ym : FMA3, VEX_L; + [(set RC:$dst, (VT (Op RC:$src2, RC:$src1, + (MemFrag addr:$src3))))]>; } +multiclass fma3p_rm_231 opc, string OpcodeStr, RegisterClass RC, + ValueType VT, X86MemOperand x86memop, PatFrag MemFrag, + SDNode Op> { + let hasSideEffects = 0 in + def r : FMA3; + + let mayLoad = 1 in + def m : FMA3; +} + +multiclass fma3p_rm_132 opc, string OpcodeStr, RegisterClass RC, + ValueType VT, X86MemOperand x86memop, PatFrag MemFrag, + SDNode Op> { + let hasSideEffects = 0 in + def r : FMA3; + + // Pattern is 312 order so that the load is in a different place from the + // 213 and 231 patterns this helps tablegen's duplicate pattern detection. + let mayLoad = 1 in + def m : FMA3; +} + +let Constraints = "$src1 = $dst", hasSideEffects = 0, isCommutable = 1 in multiclass fma3p_forms opc132, bits<8> opc213, bits<8> opc231, string OpcodeStr, string PackTy, string Suff, PatFrag MemFrag128, PatFrag MemFrag256, SDNode Op, ValueType OpTy128, ValueType OpTy256> { - defm NAME#213#Suff : fma3p_rm; - defm NAME#132#Suff : fma3p_rm; - defm NAME#231#Suff : fma3p_rm; + defm NAME#213#Suff : fma3p_rm_213; + defm NAME#231#Suff : fma3p_rm_231; + defm NAME#132#Suff : fma3p_rm_132; + + defm NAME#213#Suff#Y : fma3p_rm_213, + VEX_L; + defm NAME#231#Suff#Y : fma3p_rm_231, + VEX_L; + defm NAME#132#Suff#Y : fma3p_rm_132, + VEX_L; } // Fused Multiply-Add @@ -93,11 +122,9 @@ let ExeDomain = SSEPackedSingle in { defm VFMSUB : fma3p_forms<0x9A, 0xAA, 0xBA, "vfmsub", "ps", "PS", loadv4f32, loadv8f32, X86Fmsub, v4f32, v8f32>; defm VFMADDSUB : fma3p_forms<0x96, 0xA6, 0xB6, "vfmaddsub", "ps", "PS", - loadv4f32, loadv8f32, X86Fmaddsub, - v4f32, v8f32>; + loadv4f32, loadv8f32, X86Fmaddsub, v4f32, v8f32>; defm VFMSUBADD : fma3p_forms<0x97, 0xA7, 0xB7, "vfmsubadd", "ps", "PS", - loadv4f32, loadv8f32, X86Fmsubadd, - v4f32, v8f32>; + loadv4f32, loadv8f32, X86Fmsubadd, v4f32, v8f32>; } let ExeDomain = SSEPackedDouble in { @@ -138,23 +165,77 @@ let ExeDomain = SSEPackedDouble in { // FMA*231* reg2, reg1, reg3; // reg1 * reg3 + reg2; // Please see more detailed comment at the very beginning of the section // defining FMA3 opcodes above. -let Constraints = "$src1 = $dst", isCommutable = 1, hasSideEffects = 0 in -multiclass fma3s_rm opc, string OpcodeStr, - X86MemOperand x86memop, RegisterClass RC, - SDPatternOperator OpNode = null_frag> { - def r : FMA3; +multiclass fma3s_rm_213 opc, string OpcodeStr, + X86MemOperand x86memop, RegisterClass RC, + SDPatternOperator OpNode> { + def r : FMA3; let mayLoad = 1 in - def m : FMA3; + def m : FMA3; +} + +multiclass fma3s_rm_231 opc, string OpcodeStr, + X86MemOperand x86memop, RegisterClass RC, + SDPatternOperator OpNode> { + let hasSideEffects = 0 in + def r : FMA3; + + let mayLoad = 1 in + def m : FMA3; +} + +multiclass fma3s_rm_132 opc, string OpcodeStr, + X86MemOperand x86memop, RegisterClass RC, + SDPatternOperator OpNode> { + let hasSideEffects = 0 in + def r : FMA3; + + // Pattern is 312 order so that the load is in a different place from the + // 213 and 231 patterns this helps tablegen's duplicate pattern detection. + let mayLoad = 1 in + def m : FMA3; +} + +let Constraints = "$src1 = $dst", isCommutable = 1, hasSideEffects = 0 in +multiclass fma3s_forms opc132, bits<8> opc213, bits<8> opc231, + string OpStr, string PackTy, string Suff, + SDNode OpNode, RegisterClass RC, + X86MemOperand x86memop> { + let Predicates = [HasFMA, NoAVX512] in { + defm NAME#213#Suff : fma3s_rm_213; + defm NAME#231#Suff : fma3s_rm_231; + defm NAME#132#Suff : fma3s_rm_132; + } } // These FMA*_Int instructions are defined specially for being used when @@ -188,20 +269,6 @@ multiclass fma3s_rm_int opc, string OpcodeStr, []>; } -multiclass fma3s_forms opc132, bits<8> opc213, bits<8> opc231, - string OpStr, string PackTy, string Suff, - SDNode OpNode, RegisterClass RC, - X86MemOperand x86memop> { - let Predicates = [HasFMA, NoAVX512] in { - defm NAME#132#Suff : fma3s_rm; - defm NAME#213#Suff : fma3s_rm; - defm NAME#231#Suff : fma3s_rm; - } -} - // The FMA 213 form is created for lowering of scalar FMA intrinscis // to machine instructions. // The FMA 132 form can trivially be get by commuting the 2nd and 3rd operands