diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 299b4716a405..c9f0e2444cc7 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -799,6 +799,23 @@ static char isNegatibleForFree(SDValue Op, bool LegalOperations, TLI.isFPImmLegal(neg(cast(Op)->getValueAPF()), VT, ForCodeSize); } + case ISD::BUILD_VECTOR: { + // Only permit BUILD_VECTOR of constants. + if (llvm::any_of(Op->op_values(), [&](SDValue N) { + return !N.isUndef() && !isa(N); + })) + return 0; + if (!LegalOperations) + return 1; + if (TLI.isOperationLegal(ISD::ConstantFP, VT) && + TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) + return 1; + return llvm::all_of(Op->op_values(), [&](SDValue N) { + return N.isUndef() || + TLI.isFPImmLegal(neg(cast(N)->getValueAPF()), VT, + ForCodeSize); + }); + } case ISD::FADD: if (!Options->UnsafeFPMath && !Flags.hasNoSignedZeros()) return 0; @@ -859,27 +876,41 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, V.changeSign(); return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType()); } + case ISD::BUILD_VECTOR: { + SmallVector Ops; + for (SDValue C : Op->op_values()) { + if (C.isUndef()) { + Ops.push_back(C); + continue; + } + APFloat V = cast(C)->getValueAPF(); + V.changeSign(); + Ops.push_back(DAG.getConstantFP(V, SDLoc(Op), C.getValueType())); + } + return DAG.getBuildVector(Op.getValueType(), SDLoc(Op), Ops); + } case ISD::FADD: assert(Options.UnsafeFPMath || Flags.hasNoSignedZeros()); // fold (fneg (fadd A, B)) -> (fsub (fneg A), B) if (isNegatibleForFree(Op.getOperand(0), LegalOperations, DAG.getTargetLoweringInfo(), &Options, ForCodeSize, - Depth+1)) + Depth + 1)) return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), GetNegatedExpression(Op.getOperand(0), DAG, LegalOperations, ForCodeSize, - Depth+1), + Depth + 1), Op.getOperand(1), Flags); // fold (fneg (fadd A, B)) -> (fsub (fneg B), A) return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), GetNegatedExpression(Op.getOperand(1), DAG, LegalOperations, ForCodeSize, - Depth+1), + Depth + 1), Op.getOperand(0), Flags); case ISD::FSUB: // fold (fneg (fsub 0, B)) -> B - if (auto *N0CFP = dyn_cast(Op.getOperand(0))) + if (ConstantFPSDNode *N0CFP = + isConstOrConstSplatFP(Op.getOperand(0), /*AllowUndefs*/ true)) if (N0CFP->isZero()) return Op.getOperand(1); @@ -892,11 +923,11 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) if (isNegatibleForFree(Op.getOperand(0), LegalOperations, DAG.getTargetLoweringInfo(), &Options, ForCodeSize, - Depth+1)) + Depth + 1)) return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), GetNegatedExpression(Op.getOperand(0), DAG, LegalOperations, ForCodeSize, - Depth+1), + Depth + 1), Op.getOperand(1), Flags); // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y)) @@ -904,19 +935,19 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, Op.getOperand(0), GetNegatedExpression(Op.getOperand(1), DAG, LegalOperations, ForCodeSize, - Depth+1), Flags); + Depth + 1), Flags); case ISD::FP_EXTEND: case ISD::FSIN: return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), GetNegatedExpression(Op.getOperand(0), DAG, LegalOperations, ForCodeSize, - Depth+1)); + Depth + 1)); case ISD::FP_ROUND: return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(), GetNegatedExpression(Op.getOperand(0), DAG, LegalOperations, ForCodeSize, - Depth+1), + Depth + 1), Op.getOperand(1)); } } diff --git a/llvm/test/CodeGen/AMDGPU/fneg-fabs.f16.ll b/llvm/test/CodeGen/AMDGPU/fneg-fabs.f16.ll index f43c2f8a6b8b..aa52c3d0b4e9 100644 --- a/llvm/test/CodeGen/AMDGPU/fneg-fabs.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/fneg-fabs.f16.ll @@ -110,18 +110,17 @@ define amdgpu_kernel void @fneg_fabs_v4f16(<4 x half> addrspace(1)* %out, <4 x h ; GCN-LABEL: {{^}}fold_user_fneg_fabs_v2f16: ; CI: s_load_dword [[IN:s[0-9]+]] -; CI: s_or_b32 [[FNEG_FABS:s[0-9]+]], [[IN]], 0x80008000 ; CI: s_lshr_b32 -; CI: v_cvt_f32_f16_e32 v{{[0-9]+}}, s{{[0-9]+}} -; CI: v_cvt_f32_f16_e32 v{{[0-9]+}}, s{{[0-9]+}} -; CI: v_mul_f32_e32 v{{[0-9]+}}, 4.0, v{{[0-9]+}} -; CI: v_mul_f32_e32 v{{[0-9]+}}, 4.0, v{{[0-9]+}} +; CI: v_cvt_f32_f16_e64 v{{[0-9]+}}, |s{{[0-9]+}}| +; CI: v_cvt_f32_f16_e64 v{{[0-9]+}}, |s{{[0-9]+}}| +; CI: v_mul_f32_e32 v{{[0-9]+}}, -4.0, v{{[0-9]+}} +; CI: v_mul_f32_e32 v{{[0-9]+}}, -4.0, v{{[0-9]+}} -; VI: v_mul_f16_e64 v{{[0-9]+}}, -|s{{[0-9]+}}|, 4.0 -; VI: v_mul_f16_sdwa v{{[0-9]+}}, -|v{{[0-9]+}}|, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI: v_mul_f16_e64 v{{[0-9]+}}, |s{{[0-9]+}}|, -4.0 +; VI: v_mul_f16_sdwa v{{[0-9]+}}, |v{{[0-9]+}}|, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; GFX9: s_and_b32 [[ABS:s[0-9]+]], s{{[0-9]+}}, 0x7fff7fff -; GFX9: v_pk_mul_f16 v{{[0-9]+}}, [[ABS]], 4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0] +; GFX9: v_pk_mul_f16 v{{[0-9]+}}, [[ABS]], -4.0 op_sel_hi:[1,0] define amdgpu_kernel void @fold_user_fneg_fabs_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %in) #0 { %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %in) %fneg.fabs = fsub <2 x half> , %fabs @@ -147,7 +146,7 @@ define amdgpu_kernel void @s_fneg_multi_use_fabs_v2f16(<2 x half> addrspace(1)* ; GCN-LABEL: {{^}}s_fneg_multi_use_fabs_foldable_neg_v2f16: ; GFX9: s_and_b32 [[ABS:s[0-9]+]], s{{[0-9]+}}, 0x7fff7fff -; GFX9: v_pk_mul_f16 v{{[0-9]+}}, [[ABS]], 4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0] +; GFX9: v_pk_mul_f16 v{{[0-9]+}}, [[ABS]], -4.0 op_sel_hi:[1,0] define amdgpu_kernel void @s_fneg_multi_use_fabs_foldable_neg_v2f16(<2 x half> addrspace(1)* %out0, <2 x half> addrspace(1)* %out1, <2 x half> %in) { %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %in) %fneg = fsub <2 x half> , %fabs diff --git a/llvm/test/CodeGen/X86/fma_patterns.ll b/llvm/test/CodeGen/X86/fma_patterns.ll index 74596ba16ed1..704d0a5ce866 100644 --- a/llvm/test/CodeGen/X86/fma_patterns.ll +++ b/llvm/test/CodeGen/X86/fma_patterns.ll @@ -1163,19 +1163,19 @@ define <4 x float> @test_v4f32_mul_y_sub_negone_x_undefs(<4 x float> %x, <4 x fl define <4 x float> @test_v4f32_mul_sub_x_one_y(<4 x float> %x, <4 x float> %y) { ; FMA-INFS-LABEL: test_v4f32_mul_sub_x_one_y: ; FMA-INFS: # %bb.0: -; FMA-INFS-NEXT: vsubps {{.*}}(%rip), %xmm0, %xmm0 +; FMA-INFS-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0 ; FMA-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 ; FMA-INFS-NEXT: retq ; ; FMA4-INFS-LABEL: test_v4f32_mul_sub_x_one_y: ; FMA4-INFS: # %bb.0: -; FMA4-INFS-NEXT: vsubps {{.*}}(%rip), %xmm0, %xmm0 +; FMA4-INFS-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0 ; FMA4-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 ; FMA4-INFS-NEXT: retq ; ; AVX512-INFS-LABEL: test_v4f32_mul_sub_x_one_y: ; AVX512-INFS: # %bb.0: -; AVX512-INFS-NEXT: vsubps {{.*}}(%rip){1to4}, %xmm0, %xmm0 +; AVX512-INFS-NEXT: vaddps {{.*}}(%rip){1to4}, %xmm0, %xmm0 ; AVX512-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 ; AVX512-INFS-NEXT: retq ; @@ -1201,19 +1201,19 @@ define <4 x float> @test_v4f32_mul_sub_x_one_y(<4 x float> %x, <4 x float> %y) { define <4 x float> @test_v4f32_mul_y_sub_x_one(<4 x float> %x, <4 x float> %y) { ; FMA-INFS-LABEL: test_v4f32_mul_y_sub_x_one: ; FMA-INFS: # %bb.0: -; FMA-INFS-NEXT: vsubps {{.*}}(%rip), %xmm0, %xmm0 +; FMA-INFS-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0 ; FMA-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 ; FMA-INFS-NEXT: retq ; ; FMA4-INFS-LABEL: test_v4f32_mul_y_sub_x_one: ; FMA4-INFS: # %bb.0: -; FMA4-INFS-NEXT: vsubps {{.*}}(%rip), %xmm0, %xmm0 +; FMA4-INFS-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0 ; FMA4-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 ; FMA4-INFS-NEXT: retq ; ; AVX512-INFS-LABEL: test_v4f32_mul_y_sub_x_one: ; AVX512-INFS: # %bb.0: -; AVX512-INFS-NEXT: vsubps {{.*}}(%rip){1to4}, %xmm0, %xmm0 +; AVX512-INFS-NEXT: vaddps {{.*}}(%rip){1to4}, %xmm0, %xmm0 ; AVX512-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 ; AVX512-INFS-NEXT: retq ; @@ -1239,19 +1239,19 @@ define <4 x float> @test_v4f32_mul_y_sub_x_one(<4 x float> %x, <4 x float> %y) { define <4 x float> @test_v4f32_mul_y_sub_x_one_undefs(<4 x float> %x, <4 x float> %y) { ; FMA-INFS-LABEL: test_v4f32_mul_y_sub_x_one_undefs: ; FMA-INFS: # %bb.0: -; FMA-INFS-NEXT: vsubps {{.*}}(%rip), %xmm0, %xmm0 +; FMA-INFS-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0 ; FMA-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 ; FMA-INFS-NEXT: retq ; ; FMA4-INFS-LABEL: test_v4f32_mul_y_sub_x_one_undefs: ; FMA4-INFS: # %bb.0: -; FMA4-INFS-NEXT: vsubps {{.*}}(%rip), %xmm0, %xmm0 +; FMA4-INFS-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0 ; FMA4-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 ; FMA4-INFS-NEXT: retq ; ; AVX512-INFS-LABEL: test_v4f32_mul_y_sub_x_one_undefs: ; AVX512-INFS: # %bb.0: -; AVX512-INFS-NEXT: vsubps {{.*}}(%rip){1to4}, %xmm0, %xmm0 +; AVX512-INFS-NEXT: vaddps {{.*}}(%rip){1to4}, %xmm0, %xmm0 ; AVX512-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 ; AVX512-INFS-NEXT: retq ; @@ -1277,19 +1277,19 @@ define <4 x float> @test_v4f32_mul_y_sub_x_one_undefs(<4 x float> %x, <4 x float define <4 x float> @test_v4f32_mul_sub_x_negone_y(<4 x float> %x, <4 x float> %y) { ; FMA-INFS-LABEL: test_v4f32_mul_sub_x_negone_y: ; FMA-INFS: # %bb.0: -; FMA-INFS-NEXT: vsubps {{.*}}(%rip), %xmm0, %xmm0 +; FMA-INFS-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0 ; FMA-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 ; FMA-INFS-NEXT: retq ; ; FMA4-INFS-LABEL: test_v4f32_mul_sub_x_negone_y: ; FMA4-INFS: # %bb.0: -; FMA4-INFS-NEXT: vsubps {{.*}}(%rip), %xmm0, %xmm0 +; FMA4-INFS-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0 ; FMA4-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 ; FMA4-INFS-NEXT: retq ; ; AVX512-INFS-LABEL: test_v4f32_mul_sub_x_negone_y: ; AVX512-INFS: # %bb.0: -; AVX512-INFS-NEXT: vsubps {{.*}}(%rip){1to4}, %xmm0, %xmm0 +; AVX512-INFS-NEXT: vaddps {{.*}}(%rip){1to4}, %xmm0, %xmm0 ; AVX512-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 ; AVX512-INFS-NEXT: retq ; @@ -1315,19 +1315,19 @@ define <4 x float> @test_v4f32_mul_sub_x_negone_y(<4 x float> %x, <4 x float> %y define <4 x float> @test_v4f32_mul_y_sub_x_negone(<4 x float> %x, <4 x float> %y) { ; FMA-INFS-LABEL: test_v4f32_mul_y_sub_x_negone: ; FMA-INFS: # %bb.0: -; FMA-INFS-NEXT: vsubps {{.*}}(%rip), %xmm0, %xmm0 +; FMA-INFS-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0 ; FMA-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 ; FMA-INFS-NEXT: retq ; ; FMA4-INFS-LABEL: test_v4f32_mul_y_sub_x_negone: ; FMA4-INFS: # %bb.0: -; FMA4-INFS-NEXT: vsubps {{.*}}(%rip), %xmm0, %xmm0 +; FMA4-INFS-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0 ; FMA4-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 ; FMA4-INFS-NEXT: retq ; ; AVX512-INFS-LABEL: test_v4f32_mul_y_sub_x_negone: ; AVX512-INFS: # %bb.0: -; AVX512-INFS-NEXT: vsubps {{.*}}(%rip){1to4}, %xmm0, %xmm0 +; AVX512-INFS-NEXT: vaddps {{.*}}(%rip){1to4}, %xmm0, %xmm0 ; AVX512-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 ; AVX512-INFS-NEXT: retq ; @@ -1353,19 +1353,19 @@ define <4 x float> @test_v4f32_mul_y_sub_x_negone(<4 x float> %x, <4 x float> %y define <4 x float> @test_v4f32_mul_y_sub_x_negone_undefs(<4 x float> %x, <4 x float> %y) { ; FMA-INFS-LABEL: test_v4f32_mul_y_sub_x_negone_undefs: ; FMA-INFS: # %bb.0: -; FMA-INFS-NEXT: vsubps {{.*}}(%rip), %xmm0, %xmm0 +; FMA-INFS-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0 ; FMA-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 ; FMA-INFS-NEXT: retq ; ; FMA4-INFS-LABEL: test_v4f32_mul_y_sub_x_negone_undefs: ; FMA4-INFS: # %bb.0: -; FMA4-INFS-NEXT: vsubps {{.*}}(%rip), %xmm0, %xmm0 +; FMA4-INFS-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0 ; FMA4-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 ; FMA4-INFS-NEXT: retq ; ; AVX512-INFS-LABEL: test_v4f32_mul_y_sub_x_negone_undefs: ; AVX512-INFS: # %bb.0: -; AVX512-INFS-NEXT: vsubps {{.*}}(%rip){1to4}, %xmm0, %xmm0 +; AVX512-INFS-NEXT: vaddps {{.*}}(%rip){1to4}, %xmm0, %xmm0 ; AVX512-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 ; AVX512-INFS-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/fma_patterns_wide.ll b/llvm/test/CodeGen/X86/fma_patterns_wide.ll index fad06c977e80..c261d9958abf 100644 --- a/llvm/test/CodeGen/X86/fma_patterns_wide.ll +++ b/llvm/test/CodeGen/X86/fma_patterns_wide.ll @@ -677,25 +677,25 @@ define <8 x double> @test_v8f64_mul_y_sub_negone_x(<8 x double> %x, <8 x double> define <16 x float> @test_v16f32_mul_sub_x_one_y(<16 x float> %x, <16 x float> %y) { ; FMA-INFS-LABEL: test_v16f32_mul_sub_x_one_y: ; FMA-INFS: # %bb.0: -; FMA-INFS-NEXT: vmovaps {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] -; FMA-INFS-NEXT: vsubps %ymm4, %ymm1, %ymm1 -; FMA-INFS-NEXT: vsubps %ymm4, %ymm0, %ymm0 +; FMA-INFS-NEXT: vmovaps {{.*#+}} ymm4 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] +; FMA-INFS-NEXT: vaddps %ymm4, %ymm1, %ymm1 +; FMA-INFS-NEXT: vaddps %ymm4, %ymm0, %ymm0 ; FMA-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0 ; FMA-INFS-NEXT: vmulps %ymm3, %ymm1, %ymm1 ; FMA-INFS-NEXT: retq ; ; FMA4-INFS-LABEL: test_v16f32_mul_sub_x_one_y: ; FMA4-INFS: # %bb.0: -; FMA4-INFS-NEXT: vmovaps {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] -; FMA4-INFS-NEXT: vsubps %ymm4, %ymm1, %ymm1 -; FMA4-INFS-NEXT: vsubps %ymm4, %ymm0, %ymm0 +; FMA4-INFS-NEXT: vmovaps {{.*#+}} ymm4 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] +; FMA4-INFS-NEXT: vaddps %ymm4, %ymm1, %ymm1 +; FMA4-INFS-NEXT: vaddps %ymm4, %ymm0, %ymm0 ; FMA4-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0 ; FMA4-INFS-NEXT: vmulps %ymm3, %ymm1, %ymm1 ; FMA4-INFS-NEXT: retq ; ; AVX512-INFS-LABEL: test_v16f32_mul_sub_x_one_y: ; AVX512-INFS: # %bb.0: -; AVX512-INFS-NEXT: vsubps {{.*}}(%rip){1to16}, %zmm0, %zmm0 +; AVX512-INFS-NEXT: vaddps {{.*}}(%rip){1to16}, %zmm0, %zmm0 ; AVX512-INFS-NEXT: vmulps %zmm1, %zmm0, %zmm0 ; AVX512-INFS-NEXT: retq ; @@ -723,25 +723,25 @@ define <16 x float> @test_v16f32_mul_sub_x_one_y(<16 x float> %x, <16 x float> % define <8 x double> @test_v8f64_mul_y_sub_x_one(<8 x double> %x, <8 x double> %y) { ; FMA-INFS-LABEL: test_v8f64_mul_y_sub_x_one: ; FMA-INFS: # %bb.0: -; FMA-INFS-NEXT: vmovapd {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] -; FMA-INFS-NEXT: vsubpd %ymm4, %ymm1, %ymm1 -; FMA-INFS-NEXT: vsubpd %ymm4, %ymm0, %ymm0 +; FMA-INFS-NEXT: vmovapd {{.*#+}} ymm4 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] +; FMA-INFS-NEXT: vaddpd %ymm4, %ymm1, %ymm1 +; FMA-INFS-NEXT: vaddpd %ymm4, %ymm0, %ymm0 ; FMA-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0 ; FMA-INFS-NEXT: vmulpd %ymm1, %ymm3, %ymm1 ; FMA-INFS-NEXT: retq ; ; FMA4-INFS-LABEL: test_v8f64_mul_y_sub_x_one: ; FMA4-INFS: # %bb.0: -; FMA4-INFS-NEXT: vmovapd {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] -; FMA4-INFS-NEXT: vsubpd %ymm4, %ymm1, %ymm1 -; FMA4-INFS-NEXT: vsubpd %ymm4, %ymm0, %ymm0 +; FMA4-INFS-NEXT: vmovapd {{.*#+}} ymm4 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] +; FMA4-INFS-NEXT: vaddpd %ymm4, %ymm1, %ymm1 +; FMA4-INFS-NEXT: vaddpd %ymm4, %ymm0, %ymm0 ; FMA4-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0 ; FMA4-INFS-NEXT: vmulpd %ymm1, %ymm3, %ymm1 ; FMA4-INFS-NEXT: retq ; ; AVX512-INFS-LABEL: test_v8f64_mul_y_sub_x_one: ; AVX512-INFS: # %bb.0: -; AVX512-INFS-NEXT: vsubpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 +; AVX512-INFS-NEXT: vaddpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 ; AVX512-INFS-NEXT: vmulpd %zmm0, %zmm1, %zmm0 ; AVX512-INFS-NEXT: retq ; @@ -769,25 +769,25 @@ define <8 x double> @test_v8f64_mul_y_sub_x_one(<8 x double> %x, <8 x double> %y define <16 x float> @test_v16f32_mul_sub_x_negone_y(<16 x float> %x, <16 x float> %y) { ; FMA-INFS-LABEL: test_v16f32_mul_sub_x_negone_y: ; FMA-INFS: # %bb.0: -; FMA-INFS-NEXT: vmovaps {{.*#+}} ymm4 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] -; FMA-INFS-NEXT: vsubps %ymm4, %ymm1, %ymm1 -; FMA-INFS-NEXT: vsubps %ymm4, %ymm0, %ymm0 +; FMA-INFS-NEXT: vmovaps {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; FMA-INFS-NEXT: vaddps %ymm4, %ymm1, %ymm1 +; FMA-INFS-NEXT: vaddps %ymm4, %ymm0, %ymm0 ; FMA-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0 ; FMA-INFS-NEXT: vmulps %ymm3, %ymm1, %ymm1 ; FMA-INFS-NEXT: retq ; ; FMA4-INFS-LABEL: test_v16f32_mul_sub_x_negone_y: ; FMA4-INFS: # %bb.0: -; FMA4-INFS-NEXT: vmovaps {{.*#+}} ymm4 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] -; FMA4-INFS-NEXT: vsubps %ymm4, %ymm1, %ymm1 -; FMA4-INFS-NEXT: vsubps %ymm4, %ymm0, %ymm0 +; FMA4-INFS-NEXT: vmovaps {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; FMA4-INFS-NEXT: vaddps %ymm4, %ymm1, %ymm1 +; FMA4-INFS-NEXT: vaddps %ymm4, %ymm0, %ymm0 ; FMA4-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0 ; FMA4-INFS-NEXT: vmulps %ymm3, %ymm1, %ymm1 ; FMA4-INFS-NEXT: retq ; ; AVX512-INFS-LABEL: test_v16f32_mul_sub_x_negone_y: ; AVX512-INFS: # %bb.0: -; AVX512-INFS-NEXT: vsubps {{.*}}(%rip){1to16}, %zmm0, %zmm0 +; AVX512-INFS-NEXT: vaddps {{.*}}(%rip){1to16}, %zmm0, %zmm0 ; AVX512-INFS-NEXT: vmulps %zmm1, %zmm0, %zmm0 ; AVX512-INFS-NEXT: retq ; @@ -815,25 +815,25 @@ define <16 x float> @test_v16f32_mul_sub_x_negone_y(<16 x float> %x, <16 x float define <8 x double> @test_v8f64_mul_y_sub_x_negone(<8 x double> %x, <8 x double> %y) { ; FMA-INFS-LABEL: test_v8f64_mul_y_sub_x_negone: ; FMA-INFS: # %bb.0: -; FMA-INFS-NEXT: vmovapd {{.*#+}} ymm4 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] -; FMA-INFS-NEXT: vsubpd %ymm4, %ymm1, %ymm1 -; FMA-INFS-NEXT: vsubpd %ymm4, %ymm0, %ymm0 +; FMA-INFS-NEXT: vmovapd {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; FMA-INFS-NEXT: vaddpd %ymm4, %ymm1, %ymm1 +; FMA-INFS-NEXT: vaddpd %ymm4, %ymm0, %ymm0 ; FMA-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0 ; FMA-INFS-NEXT: vmulpd %ymm1, %ymm3, %ymm1 ; FMA-INFS-NEXT: retq ; ; FMA4-INFS-LABEL: test_v8f64_mul_y_sub_x_negone: ; FMA4-INFS: # %bb.0: -; FMA4-INFS-NEXT: vmovapd {{.*#+}} ymm4 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] -; FMA4-INFS-NEXT: vsubpd %ymm4, %ymm1, %ymm1 -; FMA4-INFS-NEXT: vsubpd %ymm4, %ymm0, %ymm0 +; FMA4-INFS-NEXT: vmovapd {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; FMA4-INFS-NEXT: vaddpd %ymm4, %ymm1, %ymm1 +; FMA4-INFS-NEXT: vaddpd %ymm4, %ymm0, %ymm0 ; FMA4-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0 ; FMA4-INFS-NEXT: vmulpd %ymm1, %ymm3, %ymm1 ; FMA4-INFS-NEXT: retq ; ; AVX512-INFS-LABEL: test_v8f64_mul_y_sub_x_negone: ; AVX512-INFS: # %bb.0: -; AVX512-INFS-NEXT: vsubpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 +; AVX512-INFS-NEXT: vaddpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 ; AVX512-INFS-NEXT: vmulpd %zmm0, %zmm1, %zmm0 ; AVX512-INFS-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/fp-fold.ll b/llvm/test/CodeGen/X86/fp-fold.ll index 45cfa58703aa..60f29eb02b44 100644 --- a/llvm/test/CodeGen/X86/fp-fold.ll +++ b/llvm/test/CodeGen/X86/fp-fold.ll @@ -124,7 +124,6 @@ define <4 x float> @fsub_neg_y_vector(<4 x float> %x, <4 x float> %y) { ; ANY-LABEL: fsub_neg_y_vector: ; ANY: # %bb.0: ; ANY-NEXT: mulps {{.*}}(%rip), %xmm0 -; ANY-NEXT: xorps {{.*}}(%rip), %xmm0 ; ANY-NEXT: retq %mul = fmul <4 x float> %x, %add = fadd <4 x float> %mul, %y @@ -136,7 +135,6 @@ define <4 x float> @fsub_neg_y_vector_nonuniform(<4 x float> %x, <4 x float> %y) ; ANY-LABEL: fsub_neg_y_vector_nonuniform: ; ANY: # %bb.0: ; ANY-NEXT: mulps {{.*}}(%rip), %xmm0 -; ANY-NEXT: xorps {{.*}}(%rip), %xmm0 ; ANY-NEXT: retq %mul = fmul <4 x float> %x, %add = fadd <4 x float> %mul, %y @@ -159,7 +157,6 @@ define <4 x float> @fsub_neg_y_commute_vector(<4 x float> %x, <4 x float> %y) { ; ANY-LABEL: fsub_neg_y_commute_vector: ; ANY: # %bb.0: ; ANY-NEXT: mulps {{.*}}(%rip), %xmm0 -; ANY-NEXT: xorps {{.*}}(%rip), %xmm0 ; ANY-NEXT: retq %mul = fmul <4 x float> %x, %add = fadd <4 x float> %y, %mul @@ -233,7 +230,8 @@ define float @fsub_negzero(float %x) { define <4 x float> @fsub_negzero_vector(<4 x float> %x) { ; STRICT-LABEL: fsub_negzero_vector: ; STRICT: # %bb.0: -; STRICT-NEXT: subps {{.*}}(%rip), %xmm0 +; STRICT-NEXT: xorps %xmm1, %xmm1 +; STRICT-NEXT: addps %xmm1, %xmm0 ; STRICT-NEXT: retq ; ; UNSAFE-LABEL: fsub_negzero_vector: