forked from OSchip/llvm-project
[SelectionDAG][ARM][AArch64][Hexagon][RISCV][X86] Add SDNPCommutative to fma and fmad nodes in tablegen. Remove explicit commuted patterns from targets.
X86 was already specially marking fma as commutable which allowed tablegen to autogenerate commuted patterns. This moves it to the target independent definition and fix up the targets to remove now unneeded patterns. Unfortunately, the tests change because the commuted version of the patterns are generating operands in a different than the explicit patterns. Differential Revision: https://reviews.llvm.org/D91842
This commit is contained in:
parent
e0e334a9c1
commit
4252f7773a
|
@ -440,8 +440,8 @@ def fsub : SDNode<"ISD::FSUB" , SDTFPBinOp>;
|
|||
def fmul : SDNode<"ISD::FMUL" , SDTFPBinOp, [SDNPCommutative]>;
|
||||
def fdiv : SDNode<"ISD::FDIV" , SDTFPBinOp>;
|
||||
def frem : SDNode<"ISD::FREM" , SDTFPBinOp>;
|
||||
def fma : SDNode<"ISD::FMA" , SDTFPTernaryOp>;
|
||||
def fmad : SDNode<"ISD::FMAD" , SDTFPTernaryOp>;
|
||||
def fma : SDNode<"ISD::FMA" , SDTFPTernaryOp, [SDNPCommutative]>;
|
||||
def fmad : SDNode<"ISD::FMAD" , SDTFPTernaryOp, [SDNPCommutative]>;
|
||||
def fabs : SDNode<"ISD::FABS" , SDTFPUnaryOp>;
|
||||
def fminnum : SDNode<"ISD::FMINNUM" , SDTFPBinOp,
|
||||
[SDNPCommutative, SDNPAssociative]>;
|
||||
|
@ -498,7 +498,7 @@ def strict_fdiv : SDNode<"ISD::STRICT_FDIV",
|
|||
def strict_frem : SDNode<"ISD::STRICT_FREM",
|
||||
SDTFPBinOp, [SDNPHasChain]>;
|
||||
def strict_fma : SDNode<"ISD::STRICT_FMA",
|
||||
SDTFPTernaryOp, [SDNPHasChain]>;
|
||||
SDTFPTernaryOp, [SDNPHasChain, SDNPCommutative]>;
|
||||
def strict_fsqrt : SDNode<"ISD::STRICT_FSQRT",
|
||||
SDTFPUnaryOp, [SDNPHasChain]>;
|
||||
def strict_fsin : SDNode<"ISD::STRICT_FSIN",
|
||||
|
|
|
@ -3738,18 +3738,6 @@ def : Pat<(f32 (fma (fneg FPR32:$Rn), FPR32:$Rm, (fneg FPR32:$Ra))),
|
|||
def : Pat<(f64 (fma (fneg FPR64:$Rn), FPR64:$Rm, (fneg FPR64:$Ra))),
|
||||
(FNMADDDrrr FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
|
||||
|
||||
// And here "(-a) + b*(-c)"
|
||||
|
||||
let Predicates = [HasNEON, HasFullFP16] in
|
||||
def : Pat<(f16 (fma FPR16:$Rn, (fneg FPR16:$Rm), (fneg FPR16:$Ra))),
|
||||
(FNMADDHrrr FPR16:$Rn, FPR16:$Rm, FPR16:$Ra)>;
|
||||
|
||||
def : Pat<(f32 (fma FPR32:$Rn, (fneg FPR32:$Rm), (fneg FPR32:$Ra))),
|
||||
(FNMADDSrrr FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>;
|
||||
|
||||
def : Pat<(f64 (fma FPR64:$Rn, (fneg FPR64:$Rm), (fneg FPR64:$Ra))),
|
||||
(FNMADDDrrr FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Floating point comparison instructions.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -4067,17 +4055,6 @@ defm FMLA : SIMDThreeSameVectorFPTied<0, 0, 0b001, "fmla",
|
|||
defm FMLS : SIMDThreeSameVectorFPTied<0, 1, 0b001, "fmls",
|
||||
TriOpFrag<(fma node:$MHS, (fneg node:$RHS), node:$LHS)> >;
|
||||
|
||||
// The following def pats catch the case where the LHS of an FMA is negated.
|
||||
// The TriOpFrag above catches the case where the middle operand is negated.
|
||||
def : Pat<(v2f32 (fma (fneg V64:$Rn), V64:$Rm, V64:$Rd)),
|
||||
(FMLSv2f32 V64:$Rd, V64:$Rn, V64:$Rm)>;
|
||||
|
||||
def : Pat<(v4f32 (fma (fneg V128:$Rn), V128:$Rm, V128:$Rd)),
|
||||
(FMLSv4f32 V128:$Rd, V128:$Rn, V128:$Rm)>;
|
||||
|
||||
def : Pat<(v2f64 (fma (fneg V128:$Rn), V128:$Rm, V128:$Rd)),
|
||||
(FMLSv2f64 V128:$Rd, V128:$Rn, V128:$Rm)>;
|
||||
|
||||
defm FMULX : SIMDThreeSameVectorFP<0,0,0b011,"fmulx", int_aarch64_neon_fmulx>;
|
||||
defm FMUL : SIMDThreeSameVectorFP<1,0,0b011,"fmul", fmul>;
|
||||
defm FRECPS : SIMDThreeSameVectorFP<0,0,0b111,"frecps", int_aarch64_neon_frecps>;
|
||||
|
|
|
@ -3684,16 +3684,10 @@ multiclass MVE_VFMA_fp_multi<string iname, bit fms, MVEVectorVTInfo VTI> {
|
|||
if fms then {
|
||||
def : Pat<(VTI.Vec (fma (fneg m1), m2, add)),
|
||||
(Inst $add, $m1, $m2)>;
|
||||
def : Pat<(VTI.Vec (fma m1, (fneg m2), add)),
|
||||
(Inst $add, $m1, $m2)>;
|
||||
def : Pat<(VTI.Vec (vselect (VTI.Pred VCCR:$pred),
|
||||
(VTI.Vec (fma (fneg m1), m2, add)),
|
||||
add)),
|
||||
(Inst $add, $m1, $m2, ARMVCCThen, $pred)>;
|
||||
def : Pat<(VTI.Vec (vselect (VTI.Pred VCCR:$pred),
|
||||
(VTI.Vec (fma m1, (fneg m2), add)),
|
||||
add)),
|
||||
(Inst $add, $m1, $m2, ARMVCCThen, $pred)>;
|
||||
def : Pat<(VTI.Vec (pred_int (fneg m1), m2, add, pred)),
|
||||
(Inst $add, $m1, $m2, ARMVCCThen, $pred)>;
|
||||
def : Pat<(VTI.Vec (pred_int m1, (fneg m2), add, pred)),
|
||||
|
|
|
@ -2264,16 +2264,6 @@ def : Pat<(f32 (fma (fneg SPR:$Sn), SPR:$Sm, SPR:$Sdin)),
|
|||
def : Pat<(f16 (fma (fneg (f16 HPR:$Sn)), (f16 HPR:$Sm), (f16 HPR:$Sdin))),
|
||||
(VFMSH (f16 HPR:$Sdin), (f16 HPR:$Sn), (f16 HPR:$Sm))>,
|
||||
Requires<[HasFullFP16]>;
|
||||
// (fma x, (fneg y), z) -> (vfms z, x, y)
|
||||
def : Pat<(f64 (fma DPR:$Dn, (fneg DPR:$Dm), DPR:$Ddin)),
|
||||
(VFMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
|
||||
Requires<[HasVFP4,HasDPVFP]>;
|
||||
def : Pat<(f32 (fma SPR:$Sn, (fneg SPR:$Sm), SPR:$Sdin)),
|
||||
(VFMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
|
||||
Requires<[HasVFP4]>;
|
||||
def : Pat<(f16 (fma (f16 HPR:$Sn), (fneg (f16 HPR:$Sm)), (f16 HPR:$Sdin))),
|
||||
(VFMSH (f16 HPR:$Sdin), (f16 HPR:$Sn), (f16 HPR:$Sm))>,
|
||||
Requires<[HasFullFP16]>;
|
||||
|
||||
def VFNMAD : ADbI<0b11101, 0b01, 1, 0,
|
||||
(outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
|
||||
|
@ -2391,16 +2381,6 @@ def : Pat<(fneg (f32 (fma (fneg SPR:$Sn), SPR:$Sm, SPR:$Sdin))),
|
|||
def : Pat<(fneg (f16 (fma (fneg (f16 HPR:$Sn)), (f16 HPR:$Sm), (f16 HPR:$Sdin)))),
|
||||
(VFNMSH (f16 HPR:$Sdin), (f16 HPR:$Sn), (f16 HPR:$Sm))>,
|
||||
Requires<[HasFullFP16]>;
|
||||
// (fneg (fma x, (fneg y), z) -> (vfnms z, x, y)
|
||||
def : Pat<(fneg (f64 (fma DPR:$Dn, (fneg DPR:$Dm), DPR:$Ddin))),
|
||||
(VFNMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
|
||||
Requires<[HasVFP4,HasDPVFP]>;
|
||||
def : Pat<(fneg (f32 (fma SPR:$Sn, (fneg SPR:$Sm), SPR:$Sdin))),
|
||||
(VFNMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
|
||||
Requires<[HasVFP4]>;
|
||||
def : Pat<(fneg (f16 (fma (f16 HPR:$Sn), (fneg (f16 HPR:$Sm)), (f16 HPR:$Sdin)))),
|
||||
(VFNMSH (f16 HPR:$Sdin), (f16 HPR:$Sn), (f16 HPR:$Sm))>,
|
||||
Requires<[HasFullFP16]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// FP Conditional moves.
|
||||
|
|
|
@ -1708,8 +1708,6 @@ def: Pat<(fma F32:$Rs, F32:$Rt, F32:$Rx),
|
|||
(F2_sffma F32:$Rx, F32:$Rs, F32:$Rt)>;
|
||||
def: Pat<(fma (fneg F32:$Rs), F32:$Rt, F32:$Rx),
|
||||
(F2_sffms F32:$Rx, F32:$Rs, F32:$Rt)>;
|
||||
def: Pat<(fma F32:$Rs, (fneg F32:$Rt), F32:$Rx),
|
||||
(F2_sffms F32:$Rx, F32:$Rs, F32:$Rt)>;
|
||||
|
||||
def: Pat<(mul V2I32:$Rs, V2I32:$Rt),
|
||||
(PS_vmulw V2I32:$Rs, V2I32:$Rt)>;
|
||||
|
|
|
@ -276,14 +276,10 @@ def : Pat<(fma FPR64:$rs1, FPR64:$rs2, (fneg FPR64:$rs3)),
|
|||
// fnmsub: -rs1 * rs2 + rs3
|
||||
def : Pat<(fma (fneg FPR64:$rs1), FPR64:$rs2, FPR64:$rs3),
|
||||
(FNMSUB_D FPR64:$rs1, FPR64:$rs2, FPR64:$rs3, 0b111)>;
|
||||
def : Pat<(fma FPR64:$rs1, (fneg FPR64:$rs2), FPR64:$rs3),
|
||||
(FNMSUB_D FPR64:$rs1, FPR64:$rs2, FPR64:$rs3, 0b111)>;
|
||||
|
||||
// fnmadd: -rs1 * rs2 - rs3
|
||||
def : Pat<(fma (fneg FPR64:$rs1), FPR64:$rs2, (fneg FPR64:$rs3)),
|
||||
(FNMADD_D FPR64:$rs1, FPR64:$rs2, FPR64:$rs3, 0b111)>;
|
||||
def : Pat<(fma FPR64:$rs1, (fneg FPR64:$rs2), (fneg FPR64:$rs3)),
|
||||
(FNMADD_D FPR64:$rs1, FPR64:$rs2, FPR64:$rs3, 0b111)>;
|
||||
|
||||
// The RISC-V 2.2 user-level ISA spec defines fmin and fmax as returning the
|
||||
// canonical NaN when giving a signaling NaN. This doesn't match the LLVM
|
||||
|
|
|
@ -332,14 +332,10 @@ def : Pat<(fma FPR32:$rs1, FPR32:$rs2, (fneg FPR32:$rs3)),
|
|||
// fnmsub: -rs1 * rs2 + rs3
|
||||
def : Pat<(fma (fneg FPR32:$rs1), FPR32:$rs2, FPR32:$rs3),
|
||||
(FNMSUB_S FPR32:$rs1, FPR32:$rs2, FPR32:$rs3, 0b111)>;
|
||||
def : Pat<(fma FPR32:$rs1, (fneg FPR32:$rs2), FPR32:$rs3),
|
||||
(FNMSUB_S FPR32:$rs1, FPR32:$rs2, FPR32:$rs3, 0b111)>;
|
||||
|
||||
// fnmadd: -rs1 * rs2 - rs3
|
||||
def : Pat<(fma (fneg FPR32:$rs1), FPR32:$rs2, (fneg FPR32:$rs3)),
|
||||
(FNMADD_S FPR32:$rs1, FPR32:$rs2, FPR32:$rs3, 0b111)>;
|
||||
def : Pat<(fma FPR32:$rs1, (fneg FPR32:$rs2), (fneg FPR32:$rs3)),
|
||||
(FNMADD_S FPR32:$rs1, FPR32:$rs2, FPR32:$rs3, 0b111)>;
|
||||
|
||||
// The RISC-V 2.2 user-level ISA spec defines fmin and fmax as returning the
|
||||
// canonical NaN when given a signaling NaN. This doesn't match the LLVM
|
||||
|
|
|
@ -6533,7 +6533,7 @@ multiclass avx512_fma3p_213_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
|||
avx512vl_f64_info, "PD">, VEX_W;
|
||||
}
|
||||
|
||||
defm VFMADD213 : avx512_fma3p_213_f<0xA8, "vfmadd213", X86any_Fmadd,
|
||||
defm VFMADD213 : avx512_fma3p_213_f<0xA8, "vfmadd213", any_fma,
|
||||
X86Fmadd, X86FmaddRnd>;
|
||||
defm VFMSUB213 : avx512_fma3p_213_f<0xAA, "vfmsub213", X86any_Fmsub,
|
||||
X86Fmsub, X86FmsubRnd>;
|
||||
|
@ -6624,7 +6624,7 @@ multiclass avx512_fma3p_231_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
|||
avx512vl_f64_info, "PD">, VEX_W;
|
||||
}
|
||||
|
||||
defm VFMADD231 : avx512_fma3p_231_f<0xB8, "vfmadd231", X86any_Fmadd,
|
||||
defm VFMADD231 : avx512_fma3p_231_f<0xB8, "vfmadd231", any_fma,
|
||||
X86Fmadd, X86FmaddRnd>;
|
||||
defm VFMSUB231 : avx512_fma3p_231_f<0xBA, "vfmsub231", X86any_Fmsub,
|
||||
X86Fmsub, X86FmsubRnd>;
|
||||
|
@ -6716,7 +6716,7 @@ multiclass avx512_fma3p_132_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
|||
avx512vl_f64_info, "PD">, VEX_W;
|
||||
}
|
||||
|
||||
defm VFMADD132 : avx512_fma3p_132_f<0x98, "vfmadd132", X86any_Fmadd,
|
||||
defm VFMADD132 : avx512_fma3p_132_f<0x98, "vfmadd132", any_fma,
|
||||
X86Fmadd, X86FmaddRnd>;
|
||||
defm VFMSUB132 : avx512_fma3p_132_f<0x9A, "vfmsub132", X86any_Fmsub,
|
||||
X86Fmsub, X86FmsubRnd>;
|
||||
|
@ -6819,7 +6819,7 @@ multiclass avx512_fma3s<bits<8> opc213, bits<8> opc231, bits<8> opc132,
|
|||
}
|
||||
}
|
||||
|
||||
defm VFMADD : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", X86any_Fmadd, X86FmaddRnd>;
|
||||
defm VFMADD : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", any_fma, X86FmaddRnd>;
|
||||
defm VFMSUB : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86any_Fmsub, X86FmsubRnd>;
|
||||
defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86any_Fnmadd, X86FnmaddRnd>;
|
||||
defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86any_Fnmsub, X86FnmsubRnd>;
|
||||
|
@ -7027,7 +7027,7 @@ multiclass avx512_scalar_fma_patterns<SDNode Op, SDNode MaskedOp,
|
|||
}
|
||||
}
|
||||
|
||||
defm : avx512_scalar_fma_patterns<X86any_Fmadd, X86Fmadd, X86FmaddRnd, "VFMADD",
|
||||
defm : avx512_scalar_fma_patterns<any_fma, X86Fmadd, X86FmaddRnd, "VFMADD",
|
||||
"SS", X86Movss, v4f32x_info, fp32imm0>;
|
||||
defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB",
|
||||
"SS", X86Movss, v4f32x_info, fp32imm0>;
|
||||
|
@ -7036,7 +7036,7 @@ defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMA
|
|||
defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB",
|
||||
"SS", X86Movss, v4f32x_info, fp32imm0>;
|
||||
|
||||
defm : avx512_scalar_fma_patterns<X86any_Fmadd, X86Fmadd, X86FmaddRnd, "VFMADD",
|
||||
defm : avx512_scalar_fma_patterns<any_fma, X86Fmadd, X86FmaddRnd, "VFMADD",
|
||||
"SD", X86Movsd, v2f64x_info, fp64imm0>;
|
||||
defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB",
|
||||
"SD", X86Movsd, v2f64x_info, fp64imm0>;
|
||||
|
|
|
@ -123,7 +123,7 @@ multiclass fma3p_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231,
|
|||
// Fused Multiply-Add
|
||||
let ExeDomain = SSEPackedSingle in {
|
||||
defm VFMADD : fma3p_forms<0x98, 0xA8, 0xB8, "vfmadd", "ps", "PS",
|
||||
loadv4f32, loadv8f32, X86any_Fmadd, v4f32, v8f32,
|
||||
loadv4f32, loadv8f32, any_fma, v4f32, v8f32,
|
||||
SchedWriteFMA>;
|
||||
defm VFMSUB : fma3p_forms<0x9A, 0xAA, 0xBA, "vfmsub", "ps", "PS",
|
||||
loadv4f32, loadv8f32, X86any_Fmsub, v4f32, v8f32,
|
||||
|
@ -138,7 +138,7 @@ let ExeDomain = SSEPackedSingle in {
|
|||
|
||||
let ExeDomain = SSEPackedDouble in {
|
||||
defm VFMADD : fma3p_forms<0x98, 0xA8, 0xB8, "vfmadd", "pd", "PD",
|
||||
loadv2f64, loadv4f64, X86any_Fmadd, v2f64,
|
||||
loadv2f64, loadv4f64, any_fma, v2f64,
|
||||
v4f64, SchedWriteFMA>, VEX_W;
|
||||
defm VFMSUB : fma3p_forms<0x9A, 0xAA, 0xBA, "vfmsub", "pd", "PD",
|
||||
loadv2f64, loadv4f64, X86any_Fmsub, v2f64,
|
||||
|
@ -319,7 +319,7 @@ multiclass fma3s<bits<8> opc132, bits<8> opc213, bits<8> opc231,
|
|||
VR128, sdmem, sched>, VEX_W;
|
||||
}
|
||||
|
||||
defm VFMADD : fma3s<0x99, 0xA9, 0xB9, "vfmadd", X86any_Fmadd,
|
||||
defm VFMADD : fma3s<0x99, 0xA9, 0xB9, "vfmadd", any_fma,
|
||||
SchedWriteFMA.Scl>, VEX_LIG;
|
||||
defm VFMSUB : fma3s<0x9B, 0xAB, 0xBB, "vfmsub", X86any_Fmsub,
|
||||
SchedWriteFMA.Scl>, VEX_LIG;
|
||||
|
@ -372,12 +372,12 @@ multiclass scalar_fma_patterns<SDNode Op, string Prefix, string Suffix,
|
|||
}
|
||||
}
|
||||
|
||||
defm : scalar_fma_patterns<X86any_Fmadd, "VFMADD", "SS", X86Movss, v4f32, f32, FR32, loadf32>;
|
||||
defm : scalar_fma_patterns<any_fma, "VFMADD", "SS", X86Movss, v4f32, f32, FR32, loadf32>;
|
||||
defm : scalar_fma_patterns<X86any_Fmsub, "VFMSUB", "SS", X86Movss, v4f32, f32, FR32, loadf32>;
|
||||
defm : scalar_fma_patterns<X86any_Fnmadd, "VFNMADD", "SS", X86Movss, v4f32, f32, FR32, loadf32>;
|
||||
defm : scalar_fma_patterns<X86any_Fnmsub, "VFNMSUB", "SS", X86Movss, v4f32, f32, FR32, loadf32>;
|
||||
|
||||
defm : scalar_fma_patterns<X86any_Fmadd, "VFMADD", "SD", X86Movsd, v2f64, f64, FR64, loadf64>;
|
||||
defm : scalar_fma_patterns<any_fma, "VFMADD", "SD", X86Movsd, v2f64, f64, FR64, loadf64>;
|
||||
defm : scalar_fma_patterns<X86any_Fmsub, "VFMSUB", "SD", X86Movsd, v2f64, f64, FR64, loadf64>;
|
||||
defm : scalar_fma_patterns<X86any_Fnmadd, "VFNMADD", "SD", X86Movsd, v2f64, f64, FR64, loadf64>;
|
||||
defm : scalar_fma_patterns<X86any_Fnmsub, "VFNMSUB", "SD", X86Movsd, v2f64, f64, FR64, loadf64>;
|
||||
|
@ -538,7 +538,7 @@ let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in {
|
|||
|
||||
let ExeDomain = SSEPackedSingle in {
|
||||
// Scalar Instructions
|
||||
defm VFMADDSS4 : fma4s<0x6A, "vfmaddss", FR32, f32mem, f32, X86any_Fmadd, loadf32,
|
||||
defm VFMADDSS4 : fma4s<0x6A, "vfmaddss", FR32, f32mem, f32, any_fma, loadf32,
|
||||
SchedWriteFMA.Scl>,
|
||||
fma4s_int<0x6A, "vfmaddss", ssmem, v4f32,
|
||||
SchedWriteFMA.Scl>;
|
||||
|
@ -555,7 +555,7 @@ let ExeDomain = SSEPackedSingle in {
|
|||
fma4s_int<0x7E, "vfnmsubss", ssmem, v4f32,
|
||||
SchedWriteFMA.Scl>;
|
||||
// Packed Instructions
|
||||
defm VFMADDPS4 : fma4p<0x68, "vfmaddps", X86any_Fmadd, v4f32, v8f32,
|
||||
defm VFMADDPS4 : fma4p<0x68, "vfmaddps", any_fma, v4f32, v8f32,
|
||||
loadv4f32, loadv8f32, SchedWriteFMA>;
|
||||
defm VFMSUBPS4 : fma4p<0x6C, "vfmsubps", X86any_Fmsub, v4f32, v8f32,
|
||||
loadv4f32, loadv8f32, SchedWriteFMA>;
|
||||
|
@ -571,7 +571,7 @@ let ExeDomain = SSEPackedSingle in {
|
|||
|
||||
let ExeDomain = SSEPackedDouble in {
|
||||
// Scalar Instructions
|
||||
defm VFMADDSD4 : fma4s<0x6B, "vfmaddsd", FR64, f64mem, f64, X86any_Fmadd, loadf64,
|
||||
defm VFMADDSD4 : fma4s<0x6B, "vfmaddsd", FR64, f64mem, f64, any_fma, loadf64,
|
||||
SchedWriteFMA.Scl>,
|
||||
fma4s_int<0x6B, "vfmaddsd", sdmem, v2f64,
|
||||
SchedWriteFMA.Scl>;
|
||||
|
@ -588,7 +588,7 @@ let ExeDomain = SSEPackedDouble in {
|
|||
fma4s_int<0x7F, "vfnmsubsd", sdmem, v2f64,
|
||||
SchedWriteFMA.Scl>;
|
||||
// Packed Instructions
|
||||
defm VFMADDPD4 : fma4p<0x69, "vfmaddpd", X86any_Fmadd, v2f64, v4f64,
|
||||
defm VFMADDPD4 : fma4p<0x69, "vfmaddpd", any_fma, v2f64, v4f64,
|
||||
loadv2f64, loadv4f64, SchedWriteFMA>;
|
||||
defm VFMSUBPD4 : fma4p<0x6D, "vfmsubpd", X86any_Fmsub, v2f64, v4f64,
|
||||
loadv2f64, loadv4f64, SchedWriteFMA>;
|
||||
|
@ -629,12 +629,12 @@ multiclass scalar_fma4_patterns<SDNode Op, string Name,
|
|||
}
|
||||
}
|
||||
|
||||
defm : scalar_fma4_patterns<X86any_Fmadd, "VFMADDSS4", v4f32, f32, FR32, loadf32>;
|
||||
defm : scalar_fma4_patterns<any_fma, "VFMADDSS4", v4f32, f32, FR32, loadf32>;
|
||||
defm : scalar_fma4_patterns<X86any_Fmsub, "VFMSUBSS4", v4f32, f32, FR32, loadf32>;
|
||||
defm : scalar_fma4_patterns<X86any_Fnmadd, "VFNMADDSS4", v4f32, f32, FR32, loadf32>;
|
||||
defm : scalar_fma4_patterns<X86any_Fnmsub, "VFNMSUBSS4", v4f32, f32, FR32, loadf32>;
|
||||
|
||||
defm : scalar_fma4_patterns<X86any_Fmadd, "VFMADDSD4", v2f64, f64, FR64, loadf64>;
|
||||
defm : scalar_fma4_patterns<any_fma, "VFMADDSD4", v2f64, f64, FR64, loadf64>;
|
||||
defm : scalar_fma4_patterns<X86any_Fmsub, "VFMSUBSD4", v2f64, f64, FR64, loadf64>;
|
||||
defm : scalar_fma4_patterns<X86any_Fnmadd, "VFNMADDSD4", v2f64, f64, FR64, loadf64>;
|
||||
defm : scalar_fma4_patterns<X86any_Fnmsub, "VFNMSUBSD4", v2f64, f64, FR64, loadf64>;
|
||||
|
|
|
@ -540,9 +540,6 @@ def X86fgetexpSAEs : SDNode<"X86ISD::FGETEXPS_SAE", SDTFPBinOp>;
|
|||
|
||||
def X86Fmadd : SDNode<"ISD::FMA", SDTFPTernaryOp, [SDNPCommutative]>;
|
||||
def X86strict_Fmadd : SDNode<"ISD::STRICT_FMA", SDTFPTernaryOp, [SDNPCommutative, SDNPHasChain]>;
|
||||
def X86any_Fmadd : PatFrags<(ops node:$src1, node:$src2, node:$src3),
|
||||
[(X86strict_Fmadd node:$src1, node:$src2, node:$src3),
|
||||
(X86Fmadd node:$src1, node:$src2, node:$src3)]>;
|
||||
def X86Fnmadd : SDNode<"X86ISD::FNMADD", SDTFPTernaryOp, [SDNPCommutative]>;
|
||||
def X86strict_Fnmadd : SDNode<"X86ISD::STRICT_FNMADD", SDTFPTernaryOp, [SDNPCommutative, SDNPHasChain]>;
|
||||
def X86any_Fnmadd : PatFrags<(ops node:$src1, node:$src2, node:$src3),
|
||||
|
|
|
@ -279,8 +279,8 @@ body: |
|
|||
; CHECK: liveins: $x0
|
||||
; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
|
||||
; CHECK: [[LDRBui:%[0-9]+]]:fpr8 = LDRBui [[COPY]], 0 :: (load 1)
|
||||
; CHECK: [[COPY2:%[0-9]+]]:gpr32 = COPY [[LDRBui]]
|
||||
; CHECK: [[UBFMWri:%[0-9]+]]:gpr32 = UBFMWri [[COPY2]], 0, 0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY [[LDRBui]]
|
||||
; CHECK: [[UBFMWri:%[0-9]+]]:gpr32 = UBFMWri [[COPY1]], 0, 0
|
||||
; CHECK: $noreg = PATCHABLE_RET [[UBFMWri]]
|
||||
%2:gpr(p0) = COPY $x0
|
||||
%0:fpr(s1) = G_LOAD %2(p0) :: (load 1)
|
||||
|
@ -544,7 +544,7 @@ body: |
|
|||
; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY $s2
|
||||
; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY $s1
|
||||
; CHECK: [[COPY2:%[0-9]+]]:fpr32 = COPY $s0
|
||||
; CHECK: [[FNMADDSrrr:%[0-9]+]]:fpr32 = FNMADDSrrr [[COPY2]], [[COPY1]], [[COPY]]
|
||||
; CHECK: [[FNMADDSrrr:%[0-9]+]]:fpr32 = FNMADDSrrr [[COPY1]], [[COPY2]], [[COPY]]
|
||||
; CHECK: $noreg = PATCHABLE_RET [[FNMADDSrrr]]
|
||||
%5:fpr(s32) = COPY $s2
|
||||
%4:fpr(s32) = COPY $s1
|
||||
|
@ -581,7 +581,7 @@ body: |
|
|||
; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d2
|
||||
; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
|
||||
; CHECK: [[COPY2:%[0-9]+]]:fpr64 = COPY $d0
|
||||
; CHECK: [[FNMADDDrrr:%[0-9]+]]:fpr64 = FNMADDDrrr [[COPY2]], [[COPY1]], [[COPY]]
|
||||
; CHECK: [[FNMADDDrrr:%[0-9]+]]:fpr64 = FNMADDDrrr [[COPY1]], [[COPY2]], [[COPY]]
|
||||
; CHECK: $noreg = PATCHABLE_RET [[FNMADDDrrr]]
|
||||
%5:fpr(s64) = COPY $d2
|
||||
%4:fpr(s64) = COPY $d1
|
||||
|
@ -1083,7 +1083,7 @@ body: |
|
|||
; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d2
|
||||
; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
|
||||
; CHECK: [[COPY2:%[0-9]+]]:fpr64 = COPY $d0
|
||||
; CHECK: [[FMLSv2f32_:%[0-9]+]]:fpr64 = FMLSv2f32 [[COPY1]], [[COPY]], [[COPY2]]
|
||||
; CHECK: [[FMLSv2f32_:%[0-9]+]]:fpr64 = FMLSv2f32 [[COPY1]], [[COPY2]], [[COPY]]
|
||||
; CHECK: $noreg = PATCHABLE_RET [[FMLSv2f32_]]
|
||||
%4:fpr(<2 x s32>) = COPY $d2
|
||||
%3:fpr(<2 x s32>) = COPY $d1
|
||||
|
@ -1118,7 +1118,7 @@ body: |
|
|||
; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q2
|
||||
; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
|
||||
; CHECK: [[COPY2:%[0-9]+]]:fpr128 = COPY $q0
|
||||
; CHECK: [[FMLSv4f32_:%[0-9]+]]:fpr128 = FMLSv4f32 [[COPY1]], [[COPY]], [[COPY2]]
|
||||
; CHECK: [[FMLSv4f32_:%[0-9]+]]:fpr128 = FMLSv4f32 [[COPY1]], [[COPY2]], [[COPY]]
|
||||
; CHECK: $noreg = PATCHABLE_RET [[FMLSv4f32_]]
|
||||
%4:fpr(<4 x s32>) = COPY $q2
|
||||
%3:fpr(<4 x s32>) = COPY $q1
|
||||
|
@ -1153,7 +1153,7 @@ body: |
|
|||
; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q2
|
||||
; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
|
||||
; CHECK: [[COPY2:%[0-9]+]]:fpr128 = COPY $q0
|
||||
; CHECK: [[FMLSv2f64_:%[0-9]+]]:fpr128 = FMLSv2f64 [[COPY1]], [[COPY]], [[COPY2]]
|
||||
; CHECK: [[FMLSv2f64_:%[0-9]+]]:fpr128 = FMLSv2f64 [[COPY1]], [[COPY2]], [[COPY]]
|
||||
; CHECK: $noreg = PATCHABLE_RET [[FMLSv2f64_]]
|
||||
%4:fpr(<2 x s64>) = COPY $q2
|
||||
%3:fpr(<2 x s64>) = COPY $q1
|
||||
|
|
|
@ -725,7 +725,7 @@ define <2 x float> @fmls_commuted_neg_2s(<2 x float>* %A, <2 x float>* %B, <2 x
|
|||
; CHECK-NEXT: ldr d1, [x0]
|
||||
; CHECK-NEXT: ldr d2, [x1]
|
||||
; CHECK-NEXT: ldr d0, [x2]
|
||||
; CHECK-NEXT: fmls.2s v0, v2, v1
|
||||
; CHECK-NEXT: fmls.2s v0, v1, v2
|
||||
; CHECK-NEXT: ret
|
||||
%tmp1 = load <2 x float>, <2 x float>* %A
|
||||
%tmp2 = load <2 x float>, <2 x float>* %B
|
||||
|
@ -741,7 +741,7 @@ define <4 x float> @fmls_commuted_neg_4s(<4 x float>* %A, <4 x float>* %B, <4 x
|
|||
; CHECK-NEXT: ldr q1, [x0]
|
||||
; CHECK-NEXT: ldr q2, [x1]
|
||||
; CHECK-NEXT: ldr q0, [x2]
|
||||
; CHECK-NEXT: fmls.4s v0, v2, v1
|
||||
; CHECK-NEXT: fmls.4s v0, v1, v2
|
||||
; CHECK-NEXT: ret
|
||||
%tmp1 = load <4 x float>, <4 x float>* %A
|
||||
%tmp2 = load <4 x float>, <4 x float>* %B
|
||||
|
@ -757,7 +757,7 @@ define <2 x double> @fmls_commuted_neg_2d(<2 x double>* %A, <2 x double>* %B, <2
|
|||
; CHECK-NEXT: ldr q1, [x0]
|
||||
; CHECK-NEXT: ldr q2, [x1]
|
||||
; CHECK-NEXT: ldr q0, [x2]
|
||||
; CHECK-NEXT: fmls.2d v0, v2, v1
|
||||
; CHECK-NEXT: fmls.2d v0, v1, v2
|
||||
; CHECK-NEXT: ret
|
||||
%tmp1 = load <2 x double>, <2 x double>* %A
|
||||
%tmp2 = load <2 x double>, <2 x double>* %B
|
||||
|
|
|
@ -640,7 +640,7 @@ body: |
|
|||
; CHECK: [[COPY:%[0-9]+]]:dpr = COPY $d0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:dpr = COPY $d1
|
||||
; CHECK: [[COPY2:%[0-9]+]]:dpr = COPY $d2
|
||||
; CHECK: [[VFMSD:%[0-9]+]]:dpr = VFMSD [[COPY2]], [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg
|
||||
; CHECK: [[VFMSD:%[0-9]+]]:dpr = VFMSD [[COPY2]], [[COPY1]], [[COPY]], 14 /* CC::al */, $noreg
|
||||
; CHECK: $d0 = COPY [[VFMSD]]
|
||||
; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $d0
|
||||
%0(s64) = COPY $d0
|
||||
|
|
|
@ -31,7 +31,7 @@ body: |
|
|||
; CHECK: [[COPY:%[0-9]+]]:dpr = COPY $d0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:dpr = COPY $d1
|
||||
; CHECK: [[COPY2:%[0-9]+]]:dpr = COPY $d2
|
||||
; CHECK: [[VFNMSD:%[0-9]+]]:dpr = VFNMSD [[COPY2]], [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg
|
||||
; CHECK: [[VFNMSD:%[0-9]+]]:dpr = VFNMSD [[COPY2]], [[COPY1]], [[COPY]], 14 /* CC::al */, $noreg
|
||||
; CHECK: $d0 = COPY [[VFNMSD]]
|
||||
; CHECK: MOVPCLR 14 /* CC::al */, $noreg, implicit $d0
|
||||
%0:fprb(s64) = COPY $d0
|
||||
|
|
|
@ -257,8 +257,8 @@ define arm_aapcs_vfpcc void @fms1(half *%a1, half *%a2, half *%a3) {
|
|||
define arm_aapcs_vfpcc void @fms2(half *%a1, half *%a2, half *%a3) {
|
||||
; CHECK-LABEL: fms2:
|
||||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: vldr.16 s0, [r0]
|
||||
; CHECK-NEXT: vldr.16 s2, [r1]
|
||||
; CHECK-NEXT: vldr.16 s0, [r1]
|
||||
; CHECK-NEXT: vldr.16 s2, [r0]
|
||||
; CHECK-NEXT: vldr.16 s4, [r2]
|
||||
; CHECK-NEXT: vfms.f16 s4, s2, s0
|
||||
; CHECK-NEXT: vstr.16 s4, [r0]
|
||||
|
@ -266,8 +266,8 @@ define arm_aapcs_vfpcc void @fms2(half *%a1, half *%a2, half *%a3) {
|
|||
;
|
||||
; DONT-FUSE-LABEL: fms2:
|
||||
; DONT-FUSE: @ %bb.0:
|
||||
; DONT-FUSE-NEXT: vldr.16 s0, [r0]
|
||||
; DONT-FUSE-NEXT: vldr.16 s2, [r1]
|
||||
; DONT-FUSE-NEXT: vldr.16 s0, [r1]
|
||||
; DONT-FUSE-NEXT: vldr.16 s2, [r0]
|
||||
; DONT-FUSE-NEXT: vldr.16 s4, [r2]
|
||||
; DONT-FUSE-NEXT: vfms.f16 s4, s2, s0
|
||||
; DONT-FUSE-NEXT: vstr.16 s4, [r0]
|
||||
|
@ -399,8 +399,8 @@ define arm_aapcs_vfpcc void @fnms2(half *%a1, half *%a2, half *%a3) {
|
|||
define arm_aapcs_vfpcc void @fnms3(half *%a1, half *%a2, half *%a3) {
|
||||
; CHECK-LABEL: fnms3:
|
||||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: vldr.16 s0, [r1]
|
||||
; CHECK-NEXT: vldr.16 s2, [r0]
|
||||
; CHECK-NEXT: vldr.16 s0, [r0]
|
||||
; CHECK-NEXT: vldr.16 s2, [r1]
|
||||
; CHECK-NEXT: vldr.16 s4, [r2]
|
||||
; CHECK-NEXT: vfnms.f16 s4, s2, s0
|
||||
; CHECK-NEXT: vstr.16 s4, [r0]
|
||||
|
@ -408,8 +408,8 @@ define arm_aapcs_vfpcc void @fnms3(half *%a1, half *%a2, half *%a3) {
|
|||
;
|
||||
; DONT-FUSE-LABEL: fnms3:
|
||||
; DONT-FUSE: @ %bb.0:
|
||||
; DONT-FUSE-NEXT: vldr.16 s0, [r1]
|
||||
; DONT-FUSE-NEXT: vldr.16 s2, [r0]
|
||||
; DONT-FUSE-NEXT: vldr.16 s0, [r0]
|
||||
; DONT-FUSE-NEXT: vldr.16 s2, [r1]
|
||||
; DONT-FUSE-NEXT: vldr.16 s4, [r2]
|
||||
; DONT-FUSE-NEXT: vfnms.f16 s4, s2, s0
|
||||
; DONT-FUSE-NEXT: vstr.16 s4, [r0]
|
||||
|
|
|
@ -543,7 +543,7 @@ define double @fnmadd_d_2(double %a, double %b, double %c) nounwind {
|
|||
; RV32IFD-NEXT: fcvt.d.w ft3, zero
|
||||
; RV32IFD-NEXT: fadd.d ft2, ft2, ft3
|
||||
; RV32IFD-NEXT: fadd.d ft1, ft1, ft3
|
||||
; RV32IFD-NEXT: fnmadd.d ft0, ft0, ft2, ft1
|
||||
; RV32IFD-NEXT: fnmadd.d ft0, ft2, ft0, ft1
|
||||
; RV32IFD-NEXT: fsd ft0, 8(sp)
|
||||
; RV32IFD-NEXT: lw a0, 8(sp)
|
||||
; RV32IFD-NEXT: lw a1, 12(sp)
|
||||
|
@ -558,7 +558,7 @@ define double @fnmadd_d_2(double %a, double %b, double %c) nounwind {
|
|||
; RV64IFD-NEXT: fmv.d.x ft3, zero
|
||||
; RV64IFD-NEXT: fadd.d ft2, ft2, ft3
|
||||
; RV64IFD-NEXT: fadd.d ft1, ft1, ft3
|
||||
; RV64IFD-NEXT: fnmadd.d ft0, ft0, ft2, ft1
|
||||
; RV64IFD-NEXT: fnmadd.d ft0, ft2, ft0, ft1
|
||||
; RV64IFD-NEXT: fmv.x.d a0, ft0
|
||||
; RV64IFD-NEXT: ret
|
||||
%b_ = fadd double 0.0, %b
|
||||
|
@ -622,7 +622,7 @@ define double @fnmsub_d_2(double %a, double %b, double %c) nounwind {
|
|||
; RV32IFD-NEXT: fld ft2, 8(sp)
|
||||
; RV32IFD-NEXT: fcvt.d.w ft3, zero
|
||||
; RV32IFD-NEXT: fadd.d ft2, ft2, ft3
|
||||
; RV32IFD-NEXT: fnmsub.d ft0, ft1, ft2, ft0
|
||||
; RV32IFD-NEXT: fnmsub.d ft0, ft2, ft1, ft0
|
||||
; RV32IFD-NEXT: fsd ft0, 8(sp)
|
||||
; RV32IFD-NEXT: lw a0, 8(sp)
|
||||
; RV32IFD-NEXT: lw a1, 12(sp)
|
||||
|
@ -636,7 +636,7 @@ define double @fnmsub_d_2(double %a, double %b, double %c) nounwind {
|
|||
; RV64IFD-NEXT: fmv.d.x ft2, a1
|
||||
; RV64IFD-NEXT: fmv.d.x ft3, zero
|
||||
; RV64IFD-NEXT: fadd.d ft2, ft2, ft3
|
||||
; RV64IFD-NEXT: fnmsub.d ft0, ft1, ft2, ft0
|
||||
; RV64IFD-NEXT: fnmsub.d ft0, ft2, ft1, ft0
|
||||
; RV64IFD-NEXT: fmv.x.d a0, ft0
|
||||
; RV64IFD-NEXT: ret
|
||||
%b_ = fadd double 0.0, %b
|
||||
|
|
|
@ -405,7 +405,7 @@ define float @fnmadd_s_2(float %a, float %b, float %c) nounwind {
|
|||
; RV32IF-NEXT: fmv.w.x ft3, zero
|
||||
; RV32IF-NEXT: fadd.s ft2, ft2, ft3
|
||||
; RV32IF-NEXT: fadd.s ft1, ft1, ft3
|
||||
; RV32IF-NEXT: fnmadd.s ft0, ft0, ft2, ft1
|
||||
; RV32IF-NEXT: fnmadd.s ft0, ft2, ft0, ft1
|
||||
; RV32IF-NEXT: fmv.x.w a0, ft0
|
||||
; RV32IF-NEXT: ret
|
||||
;
|
||||
|
@ -417,7 +417,7 @@ define float @fnmadd_s_2(float %a, float %b, float %c) nounwind {
|
|||
; RV64IF-NEXT: fmv.w.x ft3, zero
|
||||
; RV64IF-NEXT: fadd.s ft2, ft2, ft3
|
||||
; RV64IF-NEXT: fadd.s ft1, ft1, ft3
|
||||
; RV64IF-NEXT: fnmadd.s ft0, ft0, ft2, ft1
|
||||
; RV64IF-NEXT: fnmadd.s ft0, ft2, ft0, ft1
|
||||
; RV64IF-NEXT: fmv.x.w a0, ft0
|
||||
; RV64IF-NEXT: ret
|
||||
%b_ = fadd float 0.0, %b
|
||||
|
@ -464,7 +464,7 @@ define float @fnmsub_s_2(float %a, float %b, float %c) nounwind {
|
|||
; RV32IF-NEXT: fmv.w.x ft2, a1
|
||||
; RV32IF-NEXT: fmv.w.x ft3, zero
|
||||
; RV32IF-NEXT: fadd.s ft2, ft2, ft3
|
||||
; RV32IF-NEXT: fnmsub.s ft0, ft1, ft2, ft0
|
||||
; RV32IF-NEXT: fnmsub.s ft0, ft2, ft1, ft0
|
||||
; RV32IF-NEXT: fmv.x.w a0, ft0
|
||||
; RV32IF-NEXT: ret
|
||||
;
|
||||
|
@ -475,7 +475,7 @@ define float @fnmsub_s_2(float %a, float %b, float %c) nounwind {
|
|||
; RV64IF-NEXT: fmv.w.x ft2, a1
|
||||
; RV64IF-NEXT: fmv.w.x ft3, zero
|
||||
; RV64IF-NEXT: fadd.s ft2, ft2, ft3
|
||||
; RV64IF-NEXT: fnmsub.s ft0, ft1, ft2, ft0
|
||||
; RV64IF-NEXT: fnmsub.s ft0, ft2, ft1, ft0
|
||||
; RV64IF-NEXT: fmv.x.w a0, ft0
|
||||
; RV64IF-NEXT: ret
|
||||
%b_ = fadd float 0.0, %b
|
||||
|
|
|
@ -1592,7 +1592,7 @@ define arm_aapcs_vfpcc void @fms(float* nocapture readonly %pSrc1, float* nocapt
|
|||
; CHECK-NEXT: @ => This Inner Loop Header: Depth=2
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r1], #16
|
||||
; CHECK-NEXT: vldrw.u32 q2, [r0], #16
|
||||
; CHECK-NEXT: vfms.f32 q2, q0, q1
|
||||
; CHECK-NEXT: vfms.f32 q2, q1, q0
|
||||
; CHECK-NEXT: vstrb.8 q2, [r3], #16
|
||||
; CHECK-NEXT: le lr, .LBB18_3
|
||||
; CHECK-NEXT: @ %bb.4: @ %while.end
|
||||
|
|
|
@ -403,8 +403,8 @@ define arm_aapcs_vfpcc void @fmss3(float* nocapture readonly %x, float* nocaptur
|
|||
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: add.w r12, r12, #4
|
||||
; CHECK-NEXT: vmov q3, q0
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r1], #16
|
||||
; CHECK-NEXT: vldrw.u32 q2, [r0], #16
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r0], #16
|
||||
; CHECK-NEXT: vldrw.u32 q2, [r1], #16
|
||||
; CHECK-NEXT: vfms.f32 q3, q2, q1
|
||||
; CHECK-NEXT: vstrw.32 q3, [r2], #16
|
||||
; CHECK-NEXT: letp lr, .LBB6_2
|
||||
|
|
|
@ -82,7 +82,7 @@ entry:
|
|||
define arm_aapcs_vfpcc <8 x half> @test_vfmsq_f16(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
|
||||
; CHECK-LABEL: test_vfmsq_f16:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vfms.f16 q0, q1, q2
|
||||
; CHECK-NEXT: vfms.f16 q0, q2, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = fneg <8 x half> %c
|
||||
|
@ -93,7 +93,7 @@ entry:
|
|||
define arm_aapcs_vfpcc <4 x float> @test_vfmsq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
|
||||
; CHECK-LABEL: test_vfmsq_f32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vfms.f32 q0, q1, q2
|
||||
; CHECK-NEXT: vfms.f32 q0, q2, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = fneg <4 x float> %c
|
||||
|
|
Loading…
Reference in New Issue