forked from OSchip/llvm-project
[DAGCombine] GetNegatedExpression - constant float vector support (PR42105)
Add support for negation of constant build vectors. Differential Revision: https://reviews.llvm.org/D62963 llvm-svn: 363040
This commit is contained in:
parent
8c865cacda
commit
287e78c82b
|
@ -799,6 +799,23 @@ static char isNegatibleForFree(SDValue Op, bool LegalOperations,
|
|||
TLI.isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT,
|
||||
ForCodeSize);
|
||||
}
|
||||
case ISD::BUILD_VECTOR: {
|
||||
// Only permit BUILD_VECTOR of constants.
|
||||
if (llvm::any_of(Op->op_values(), [&](SDValue N) {
|
||||
return !N.isUndef() && !isa<ConstantFPSDNode>(N);
|
||||
}))
|
||||
return 0;
|
||||
if (!LegalOperations)
|
||||
return 1;
|
||||
if (TLI.isOperationLegal(ISD::ConstantFP, VT) &&
|
||||
TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
|
||||
return 1;
|
||||
return llvm::all_of(Op->op_values(), [&](SDValue N) {
|
||||
return N.isUndef() ||
|
||||
TLI.isFPImmLegal(neg(cast<ConstantFPSDNode>(N)->getValueAPF()), VT,
|
||||
ForCodeSize);
|
||||
});
|
||||
}
|
||||
case ISD::FADD:
|
||||
if (!Options->UnsafeFPMath && !Flags.hasNoSignedZeros())
|
||||
return 0;
|
||||
|
@ -859,27 +876,41 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
|
|||
V.changeSign();
|
||||
return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType());
|
||||
}
|
||||
case ISD::BUILD_VECTOR: {
|
||||
SmallVector<SDValue, 4> Ops;
|
||||
for (SDValue C : Op->op_values()) {
|
||||
if (C.isUndef()) {
|
||||
Ops.push_back(C);
|
||||
continue;
|
||||
}
|
||||
APFloat V = cast<ConstantFPSDNode>(C)->getValueAPF();
|
||||
V.changeSign();
|
||||
Ops.push_back(DAG.getConstantFP(V, SDLoc(Op), C.getValueType()));
|
||||
}
|
||||
return DAG.getBuildVector(Op.getValueType(), SDLoc(Op), Ops);
|
||||
}
|
||||
case ISD::FADD:
|
||||
assert(Options.UnsafeFPMath || Flags.hasNoSignedZeros());
|
||||
|
||||
// fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
|
||||
if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
|
||||
DAG.getTargetLoweringInfo(), &Options, ForCodeSize,
|
||||
Depth+1))
|
||||
Depth + 1))
|
||||
return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
|
||||
GetNegatedExpression(Op.getOperand(0), DAG,
|
||||
LegalOperations, ForCodeSize,
|
||||
Depth+1),
|
||||
Depth + 1),
|
||||
Op.getOperand(1), Flags);
|
||||
// fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
|
||||
return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
|
||||
GetNegatedExpression(Op.getOperand(1), DAG,
|
||||
LegalOperations, ForCodeSize,
|
||||
Depth+1),
|
||||
Depth + 1),
|
||||
Op.getOperand(0), Flags);
|
||||
case ISD::FSUB:
|
||||
// fold (fneg (fsub 0, B)) -> B
|
||||
if (auto *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0)))
|
||||
if (ConstantFPSDNode *N0CFP =
|
||||
isConstOrConstSplatFP(Op.getOperand(0), /*AllowUndefs*/ true))
|
||||
if (N0CFP->isZero())
|
||||
return Op.getOperand(1);
|
||||
|
||||
|
@ -892,11 +923,11 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
|
|||
// fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
|
||||
if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
|
||||
DAG.getTargetLoweringInfo(), &Options, ForCodeSize,
|
||||
Depth+1))
|
||||
Depth + 1))
|
||||
return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
|
||||
GetNegatedExpression(Op.getOperand(0), DAG,
|
||||
LegalOperations, ForCodeSize,
|
||||
Depth+1),
|
||||
Depth + 1),
|
||||
Op.getOperand(1), Flags);
|
||||
|
||||
// fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
|
||||
|
@ -904,19 +935,19 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
|
|||
Op.getOperand(0),
|
||||
GetNegatedExpression(Op.getOperand(1), DAG,
|
||||
LegalOperations, ForCodeSize,
|
||||
Depth+1), Flags);
|
||||
Depth + 1), Flags);
|
||||
|
||||
case ISD::FP_EXTEND:
|
||||
case ISD::FSIN:
|
||||
return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
|
||||
GetNegatedExpression(Op.getOperand(0), DAG,
|
||||
LegalOperations, ForCodeSize,
|
||||
Depth+1));
|
||||
Depth + 1));
|
||||
case ISD::FP_ROUND:
|
||||
return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(),
|
||||
GetNegatedExpression(Op.getOperand(0), DAG,
|
||||
LegalOperations, ForCodeSize,
|
||||
Depth+1),
|
||||
Depth + 1),
|
||||
Op.getOperand(1));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -110,18 +110,17 @@ define amdgpu_kernel void @fneg_fabs_v4f16(<4 x half> addrspace(1)* %out, <4 x h
|
|||
|
||||
; GCN-LABEL: {{^}}fold_user_fneg_fabs_v2f16:
|
||||
; CI: s_load_dword [[IN:s[0-9]+]]
|
||||
; CI: s_or_b32 [[FNEG_FABS:s[0-9]+]], [[IN]], 0x80008000
|
||||
; CI: s_lshr_b32
|
||||
; CI: v_cvt_f32_f16_e32 v{{[0-9]+}}, s{{[0-9]+}}
|
||||
; CI: v_cvt_f32_f16_e32 v{{[0-9]+}}, s{{[0-9]+}}
|
||||
; CI: v_mul_f32_e32 v{{[0-9]+}}, 4.0, v{{[0-9]+}}
|
||||
; CI: v_mul_f32_e32 v{{[0-9]+}}, 4.0, v{{[0-9]+}}
|
||||
; CI: v_cvt_f32_f16_e64 v{{[0-9]+}}, |s{{[0-9]+}}|
|
||||
; CI: v_cvt_f32_f16_e64 v{{[0-9]+}}, |s{{[0-9]+}}|
|
||||
; CI: v_mul_f32_e32 v{{[0-9]+}}, -4.0, v{{[0-9]+}}
|
||||
; CI: v_mul_f32_e32 v{{[0-9]+}}, -4.0, v{{[0-9]+}}
|
||||
|
||||
; VI: v_mul_f16_e64 v{{[0-9]+}}, -|s{{[0-9]+}}|, 4.0
|
||||
; VI: v_mul_f16_sdwa v{{[0-9]+}}, -|v{{[0-9]+}}|, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
||||
; VI: v_mul_f16_e64 v{{[0-9]+}}, |s{{[0-9]+}}|, -4.0
|
||||
; VI: v_mul_f16_sdwa v{{[0-9]+}}, |v{{[0-9]+}}|, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
||||
|
||||
; GFX9: s_and_b32 [[ABS:s[0-9]+]], s{{[0-9]+}}, 0x7fff7fff
|
||||
; GFX9: v_pk_mul_f16 v{{[0-9]+}}, [[ABS]], 4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
|
||||
; GFX9: v_pk_mul_f16 v{{[0-9]+}}, [[ABS]], -4.0 op_sel_hi:[1,0]
|
||||
define amdgpu_kernel void @fold_user_fneg_fabs_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %in) #0 {
|
||||
%fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %in)
|
||||
%fneg.fabs = fsub <2 x half> <half -0.0, half -0.0>, %fabs
|
||||
|
@ -147,7 +146,7 @@ define amdgpu_kernel void @s_fneg_multi_use_fabs_v2f16(<2 x half> addrspace(1)*
|
|||
|
||||
; GCN-LABEL: {{^}}s_fneg_multi_use_fabs_foldable_neg_v2f16:
|
||||
; GFX9: s_and_b32 [[ABS:s[0-9]+]], s{{[0-9]+}}, 0x7fff7fff
|
||||
; GFX9: v_pk_mul_f16 v{{[0-9]+}}, [[ABS]], 4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
|
||||
; GFX9: v_pk_mul_f16 v{{[0-9]+}}, [[ABS]], -4.0 op_sel_hi:[1,0]
|
||||
define amdgpu_kernel void @s_fneg_multi_use_fabs_foldable_neg_v2f16(<2 x half> addrspace(1)* %out0, <2 x half> addrspace(1)* %out1, <2 x half> %in) {
|
||||
%fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %in)
|
||||
%fneg = fsub <2 x half> <half -0.0, half -0.0>, %fabs
|
||||
|
|
|
@ -1163,19 +1163,19 @@ define <4 x float> @test_v4f32_mul_y_sub_negone_x_undefs(<4 x float> %x, <4 x fl
|
|||
define <4 x float> @test_v4f32_mul_sub_x_one_y(<4 x float> %x, <4 x float> %y) {
|
||||
; FMA-INFS-LABEL: test_v4f32_mul_sub_x_one_y:
|
||||
; FMA-INFS: # %bb.0:
|
||||
; FMA-INFS-NEXT: vsubps {{.*}}(%rip), %xmm0, %xmm0
|
||||
; FMA-INFS-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0
|
||||
; FMA-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0
|
||||
; FMA-INFS-NEXT: retq
|
||||
;
|
||||
; FMA4-INFS-LABEL: test_v4f32_mul_sub_x_one_y:
|
||||
; FMA4-INFS: # %bb.0:
|
||||
; FMA4-INFS-NEXT: vsubps {{.*}}(%rip), %xmm0, %xmm0
|
||||
; FMA4-INFS-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0
|
||||
; FMA4-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0
|
||||
; FMA4-INFS-NEXT: retq
|
||||
;
|
||||
; AVX512-INFS-LABEL: test_v4f32_mul_sub_x_one_y:
|
||||
; AVX512-INFS: # %bb.0:
|
||||
; AVX512-INFS-NEXT: vsubps {{.*}}(%rip){1to4}, %xmm0, %xmm0
|
||||
; AVX512-INFS-NEXT: vaddps {{.*}}(%rip){1to4}, %xmm0, %xmm0
|
||||
; AVX512-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0
|
||||
; AVX512-INFS-NEXT: retq
|
||||
;
|
||||
|
@ -1201,19 +1201,19 @@ define <4 x float> @test_v4f32_mul_sub_x_one_y(<4 x float> %x, <4 x float> %y) {
|
|||
define <4 x float> @test_v4f32_mul_y_sub_x_one(<4 x float> %x, <4 x float> %y) {
|
||||
; FMA-INFS-LABEL: test_v4f32_mul_y_sub_x_one:
|
||||
; FMA-INFS: # %bb.0:
|
||||
; FMA-INFS-NEXT: vsubps {{.*}}(%rip), %xmm0, %xmm0
|
||||
; FMA-INFS-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0
|
||||
; FMA-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
|
||||
; FMA-INFS-NEXT: retq
|
||||
;
|
||||
; FMA4-INFS-LABEL: test_v4f32_mul_y_sub_x_one:
|
||||
; FMA4-INFS: # %bb.0:
|
||||
; FMA4-INFS-NEXT: vsubps {{.*}}(%rip), %xmm0, %xmm0
|
||||
; FMA4-INFS-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0
|
||||
; FMA4-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
|
||||
; FMA4-INFS-NEXT: retq
|
||||
;
|
||||
; AVX512-INFS-LABEL: test_v4f32_mul_y_sub_x_one:
|
||||
; AVX512-INFS: # %bb.0:
|
||||
; AVX512-INFS-NEXT: vsubps {{.*}}(%rip){1to4}, %xmm0, %xmm0
|
||||
; AVX512-INFS-NEXT: vaddps {{.*}}(%rip){1to4}, %xmm0, %xmm0
|
||||
; AVX512-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
|
||||
; AVX512-INFS-NEXT: retq
|
||||
;
|
||||
|
@ -1239,19 +1239,19 @@ define <4 x float> @test_v4f32_mul_y_sub_x_one(<4 x float> %x, <4 x float> %y) {
|
|||
define <4 x float> @test_v4f32_mul_y_sub_x_one_undefs(<4 x float> %x, <4 x float> %y) {
|
||||
; FMA-INFS-LABEL: test_v4f32_mul_y_sub_x_one_undefs:
|
||||
; FMA-INFS: # %bb.0:
|
||||
; FMA-INFS-NEXT: vsubps {{.*}}(%rip), %xmm0, %xmm0
|
||||
; FMA-INFS-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0
|
||||
; FMA-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
|
||||
; FMA-INFS-NEXT: retq
|
||||
;
|
||||
; FMA4-INFS-LABEL: test_v4f32_mul_y_sub_x_one_undefs:
|
||||
; FMA4-INFS: # %bb.0:
|
||||
; FMA4-INFS-NEXT: vsubps {{.*}}(%rip), %xmm0, %xmm0
|
||||
; FMA4-INFS-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0
|
||||
; FMA4-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
|
||||
; FMA4-INFS-NEXT: retq
|
||||
;
|
||||
; AVX512-INFS-LABEL: test_v4f32_mul_y_sub_x_one_undefs:
|
||||
; AVX512-INFS: # %bb.0:
|
||||
; AVX512-INFS-NEXT: vsubps {{.*}}(%rip){1to4}, %xmm0, %xmm0
|
||||
; AVX512-INFS-NEXT: vaddps {{.*}}(%rip){1to4}, %xmm0, %xmm0
|
||||
; AVX512-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
|
||||
; AVX512-INFS-NEXT: retq
|
||||
;
|
||||
|
@ -1277,19 +1277,19 @@ define <4 x float> @test_v4f32_mul_y_sub_x_one_undefs(<4 x float> %x, <4 x float
|
|||
define <4 x float> @test_v4f32_mul_sub_x_negone_y(<4 x float> %x, <4 x float> %y) {
|
||||
; FMA-INFS-LABEL: test_v4f32_mul_sub_x_negone_y:
|
||||
; FMA-INFS: # %bb.0:
|
||||
; FMA-INFS-NEXT: vsubps {{.*}}(%rip), %xmm0, %xmm0
|
||||
; FMA-INFS-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0
|
||||
; FMA-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0
|
||||
; FMA-INFS-NEXT: retq
|
||||
;
|
||||
; FMA4-INFS-LABEL: test_v4f32_mul_sub_x_negone_y:
|
||||
; FMA4-INFS: # %bb.0:
|
||||
; FMA4-INFS-NEXT: vsubps {{.*}}(%rip), %xmm0, %xmm0
|
||||
; FMA4-INFS-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0
|
||||
; FMA4-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0
|
||||
; FMA4-INFS-NEXT: retq
|
||||
;
|
||||
; AVX512-INFS-LABEL: test_v4f32_mul_sub_x_negone_y:
|
||||
; AVX512-INFS: # %bb.0:
|
||||
; AVX512-INFS-NEXT: vsubps {{.*}}(%rip){1to4}, %xmm0, %xmm0
|
||||
; AVX512-INFS-NEXT: vaddps {{.*}}(%rip){1to4}, %xmm0, %xmm0
|
||||
; AVX512-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0
|
||||
; AVX512-INFS-NEXT: retq
|
||||
;
|
||||
|
@ -1315,19 +1315,19 @@ define <4 x float> @test_v4f32_mul_sub_x_negone_y(<4 x float> %x, <4 x float> %y
|
|||
define <4 x float> @test_v4f32_mul_y_sub_x_negone(<4 x float> %x, <4 x float> %y) {
|
||||
; FMA-INFS-LABEL: test_v4f32_mul_y_sub_x_negone:
|
||||
; FMA-INFS: # %bb.0:
|
||||
; FMA-INFS-NEXT: vsubps {{.*}}(%rip), %xmm0, %xmm0
|
||||
; FMA-INFS-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0
|
||||
; FMA-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
|
||||
; FMA-INFS-NEXT: retq
|
||||
;
|
||||
; FMA4-INFS-LABEL: test_v4f32_mul_y_sub_x_negone:
|
||||
; FMA4-INFS: # %bb.0:
|
||||
; FMA4-INFS-NEXT: vsubps {{.*}}(%rip), %xmm0, %xmm0
|
||||
; FMA4-INFS-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0
|
||||
; FMA4-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
|
||||
; FMA4-INFS-NEXT: retq
|
||||
;
|
||||
; AVX512-INFS-LABEL: test_v4f32_mul_y_sub_x_negone:
|
||||
; AVX512-INFS: # %bb.0:
|
||||
; AVX512-INFS-NEXT: vsubps {{.*}}(%rip){1to4}, %xmm0, %xmm0
|
||||
; AVX512-INFS-NEXT: vaddps {{.*}}(%rip){1to4}, %xmm0, %xmm0
|
||||
; AVX512-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
|
||||
; AVX512-INFS-NEXT: retq
|
||||
;
|
||||
|
@ -1353,19 +1353,19 @@ define <4 x float> @test_v4f32_mul_y_sub_x_negone(<4 x float> %x, <4 x float> %y
|
|||
define <4 x float> @test_v4f32_mul_y_sub_x_negone_undefs(<4 x float> %x, <4 x float> %y) {
|
||||
; FMA-INFS-LABEL: test_v4f32_mul_y_sub_x_negone_undefs:
|
||||
; FMA-INFS: # %bb.0:
|
||||
; FMA-INFS-NEXT: vsubps {{.*}}(%rip), %xmm0, %xmm0
|
||||
; FMA-INFS-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0
|
||||
; FMA-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
|
||||
; FMA-INFS-NEXT: retq
|
||||
;
|
||||
; FMA4-INFS-LABEL: test_v4f32_mul_y_sub_x_negone_undefs:
|
||||
; FMA4-INFS: # %bb.0:
|
||||
; FMA4-INFS-NEXT: vsubps {{.*}}(%rip), %xmm0, %xmm0
|
||||
; FMA4-INFS-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0
|
||||
; FMA4-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
|
||||
; FMA4-INFS-NEXT: retq
|
||||
;
|
||||
; AVX512-INFS-LABEL: test_v4f32_mul_y_sub_x_negone_undefs:
|
||||
; AVX512-INFS: # %bb.0:
|
||||
; AVX512-INFS-NEXT: vsubps {{.*}}(%rip){1to4}, %xmm0, %xmm0
|
||||
; AVX512-INFS-NEXT: vaddps {{.*}}(%rip){1to4}, %xmm0, %xmm0
|
||||
; AVX512-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
|
||||
; AVX512-INFS-NEXT: retq
|
||||
;
|
||||
|
|
|
@ -677,25 +677,25 @@ define <8 x double> @test_v8f64_mul_y_sub_negone_x(<8 x double> %x, <8 x double>
|
|||
define <16 x float> @test_v16f32_mul_sub_x_one_y(<16 x float> %x, <16 x float> %y) {
|
||||
; FMA-INFS-LABEL: test_v16f32_mul_sub_x_one_y:
|
||||
; FMA-INFS: # %bb.0:
|
||||
; FMA-INFS-NEXT: vmovaps {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
|
||||
; FMA-INFS-NEXT: vsubps %ymm4, %ymm1, %ymm1
|
||||
; FMA-INFS-NEXT: vsubps %ymm4, %ymm0, %ymm0
|
||||
; FMA-INFS-NEXT: vmovaps {{.*#+}} ymm4 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0]
|
||||
; FMA-INFS-NEXT: vaddps %ymm4, %ymm1, %ymm1
|
||||
; FMA-INFS-NEXT: vaddps %ymm4, %ymm0, %ymm0
|
||||
; FMA-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0
|
||||
; FMA-INFS-NEXT: vmulps %ymm3, %ymm1, %ymm1
|
||||
; FMA-INFS-NEXT: retq
|
||||
;
|
||||
; FMA4-INFS-LABEL: test_v16f32_mul_sub_x_one_y:
|
||||
; FMA4-INFS: # %bb.0:
|
||||
; FMA4-INFS-NEXT: vmovaps {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
|
||||
; FMA4-INFS-NEXT: vsubps %ymm4, %ymm1, %ymm1
|
||||
; FMA4-INFS-NEXT: vsubps %ymm4, %ymm0, %ymm0
|
||||
; FMA4-INFS-NEXT: vmovaps {{.*#+}} ymm4 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0]
|
||||
; FMA4-INFS-NEXT: vaddps %ymm4, %ymm1, %ymm1
|
||||
; FMA4-INFS-NEXT: vaddps %ymm4, %ymm0, %ymm0
|
||||
; FMA4-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0
|
||||
; FMA4-INFS-NEXT: vmulps %ymm3, %ymm1, %ymm1
|
||||
; FMA4-INFS-NEXT: retq
|
||||
;
|
||||
; AVX512-INFS-LABEL: test_v16f32_mul_sub_x_one_y:
|
||||
; AVX512-INFS: # %bb.0:
|
||||
; AVX512-INFS-NEXT: vsubps {{.*}}(%rip){1to16}, %zmm0, %zmm0
|
||||
; AVX512-INFS-NEXT: vaddps {{.*}}(%rip){1to16}, %zmm0, %zmm0
|
||||
; AVX512-INFS-NEXT: vmulps %zmm1, %zmm0, %zmm0
|
||||
; AVX512-INFS-NEXT: retq
|
||||
;
|
||||
|
@ -723,25 +723,25 @@ define <16 x float> @test_v16f32_mul_sub_x_one_y(<16 x float> %x, <16 x float> %
|
|||
define <8 x double> @test_v8f64_mul_y_sub_x_one(<8 x double> %x, <8 x double> %y) {
|
||||
; FMA-INFS-LABEL: test_v8f64_mul_y_sub_x_one:
|
||||
; FMA-INFS: # %bb.0:
|
||||
; FMA-INFS-NEXT: vmovapd {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
|
||||
; FMA-INFS-NEXT: vsubpd %ymm4, %ymm1, %ymm1
|
||||
; FMA-INFS-NEXT: vsubpd %ymm4, %ymm0, %ymm0
|
||||
; FMA-INFS-NEXT: vmovapd {{.*#+}} ymm4 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0]
|
||||
; FMA-INFS-NEXT: vaddpd %ymm4, %ymm1, %ymm1
|
||||
; FMA-INFS-NEXT: vaddpd %ymm4, %ymm0, %ymm0
|
||||
; FMA-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0
|
||||
; FMA-INFS-NEXT: vmulpd %ymm1, %ymm3, %ymm1
|
||||
; FMA-INFS-NEXT: retq
|
||||
;
|
||||
; FMA4-INFS-LABEL: test_v8f64_mul_y_sub_x_one:
|
||||
; FMA4-INFS: # %bb.0:
|
||||
; FMA4-INFS-NEXT: vmovapd {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
|
||||
; FMA4-INFS-NEXT: vsubpd %ymm4, %ymm1, %ymm1
|
||||
; FMA4-INFS-NEXT: vsubpd %ymm4, %ymm0, %ymm0
|
||||
; FMA4-INFS-NEXT: vmovapd {{.*#+}} ymm4 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0]
|
||||
; FMA4-INFS-NEXT: vaddpd %ymm4, %ymm1, %ymm1
|
||||
; FMA4-INFS-NEXT: vaddpd %ymm4, %ymm0, %ymm0
|
||||
; FMA4-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0
|
||||
; FMA4-INFS-NEXT: vmulpd %ymm1, %ymm3, %ymm1
|
||||
; FMA4-INFS-NEXT: retq
|
||||
;
|
||||
; AVX512-INFS-LABEL: test_v8f64_mul_y_sub_x_one:
|
||||
; AVX512-INFS: # %bb.0:
|
||||
; AVX512-INFS-NEXT: vsubpd {{.*}}(%rip){1to8}, %zmm0, %zmm0
|
||||
; AVX512-INFS-NEXT: vaddpd {{.*}}(%rip){1to8}, %zmm0, %zmm0
|
||||
; AVX512-INFS-NEXT: vmulpd %zmm0, %zmm1, %zmm0
|
||||
; AVX512-INFS-NEXT: retq
|
||||
;
|
||||
|
@ -769,25 +769,25 @@ define <8 x double> @test_v8f64_mul_y_sub_x_one(<8 x double> %x, <8 x double> %y
|
|||
define <16 x float> @test_v16f32_mul_sub_x_negone_y(<16 x float> %x, <16 x float> %y) {
|
||||
; FMA-INFS-LABEL: test_v16f32_mul_sub_x_negone_y:
|
||||
; FMA-INFS: # %bb.0:
|
||||
; FMA-INFS-NEXT: vmovaps {{.*#+}} ymm4 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0]
|
||||
; FMA-INFS-NEXT: vsubps %ymm4, %ymm1, %ymm1
|
||||
; FMA-INFS-NEXT: vsubps %ymm4, %ymm0, %ymm0
|
||||
; FMA-INFS-NEXT: vmovaps {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
|
||||
; FMA-INFS-NEXT: vaddps %ymm4, %ymm1, %ymm1
|
||||
; FMA-INFS-NEXT: vaddps %ymm4, %ymm0, %ymm0
|
||||
; FMA-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0
|
||||
; FMA-INFS-NEXT: vmulps %ymm3, %ymm1, %ymm1
|
||||
; FMA-INFS-NEXT: retq
|
||||
;
|
||||
; FMA4-INFS-LABEL: test_v16f32_mul_sub_x_negone_y:
|
||||
; FMA4-INFS: # %bb.0:
|
||||
; FMA4-INFS-NEXT: vmovaps {{.*#+}} ymm4 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0]
|
||||
; FMA4-INFS-NEXT: vsubps %ymm4, %ymm1, %ymm1
|
||||
; FMA4-INFS-NEXT: vsubps %ymm4, %ymm0, %ymm0
|
||||
; FMA4-INFS-NEXT: vmovaps {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
|
||||
; FMA4-INFS-NEXT: vaddps %ymm4, %ymm1, %ymm1
|
||||
; FMA4-INFS-NEXT: vaddps %ymm4, %ymm0, %ymm0
|
||||
; FMA4-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0
|
||||
; FMA4-INFS-NEXT: vmulps %ymm3, %ymm1, %ymm1
|
||||
; FMA4-INFS-NEXT: retq
|
||||
;
|
||||
; AVX512-INFS-LABEL: test_v16f32_mul_sub_x_negone_y:
|
||||
; AVX512-INFS: # %bb.0:
|
||||
; AVX512-INFS-NEXT: vsubps {{.*}}(%rip){1to16}, %zmm0, %zmm0
|
||||
; AVX512-INFS-NEXT: vaddps {{.*}}(%rip){1to16}, %zmm0, %zmm0
|
||||
; AVX512-INFS-NEXT: vmulps %zmm1, %zmm0, %zmm0
|
||||
; AVX512-INFS-NEXT: retq
|
||||
;
|
||||
|
@ -815,25 +815,25 @@ define <16 x float> @test_v16f32_mul_sub_x_negone_y(<16 x float> %x, <16 x float
|
|||
define <8 x double> @test_v8f64_mul_y_sub_x_negone(<8 x double> %x, <8 x double> %y) {
|
||||
; FMA-INFS-LABEL: test_v8f64_mul_y_sub_x_negone:
|
||||
; FMA-INFS: # %bb.0:
|
||||
; FMA-INFS-NEXT: vmovapd {{.*#+}} ymm4 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0]
|
||||
; FMA-INFS-NEXT: vsubpd %ymm4, %ymm1, %ymm1
|
||||
; FMA-INFS-NEXT: vsubpd %ymm4, %ymm0, %ymm0
|
||||
; FMA-INFS-NEXT: vmovapd {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
|
||||
; FMA-INFS-NEXT: vaddpd %ymm4, %ymm1, %ymm1
|
||||
; FMA-INFS-NEXT: vaddpd %ymm4, %ymm0, %ymm0
|
||||
; FMA-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0
|
||||
; FMA-INFS-NEXT: vmulpd %ymm1, %ymm3, %ymm1
|
||||
; FMA-INFS-NEXT: retq
|
||||
;
|
||||
; FMA4-INFS-LABEL: test_v8f64_mul_y_sub_x_negone:
|
||||
; FMA4-INFS: # %bb.0:
|
||||
; FMA4-INFS-NEXT: vmovapd {{.*#+}} ymm4 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0]
|
||||
; FMA4-INFS-NEXT: vsubpd %ymm4, %ymm1, %ymm1
|
||||
; FMA4-INFS-NEXT: vsubpd %ymm4, %ymm0, %ymm0
|
||||
; FMA4-INFS-NEXT: vmovapd {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
|
||||
; FMA4-INFS-NEXT: vaddpd %ymm4, %ymm1, %ymm1
|
||||
; FMA4-INFS-NEXT: vaddpd %ymm4, %ymm0, %ymm0
|
||||
; FMA4-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0
|
||||
; FMA4-INFS-NEXT: vmulpd %ymm1, %ymm3, %ymm1
|
||||
; FMA4-INFS-NEXT: retq
|
||||
;
|
||||
; AVX512-INFS-LABEL: test_v8f64_mul_y_sub_x_negone:
|
||||
; AVX512-INFS: # %bb.0:
|
||||
; AVX512-INFS-NEXT: vsubpd {{.*}}(%rip){1to8}, %zmm0, %zmm0
|
||||
; AVX512-INFS-NEXT: vaddpd {{.*}}(%rip){1to8}, %zmm0, %zmm0
|
||||
; AVX512-INFS-NEXT: vmulpd %zmm0, %zmm1, %zmm0
|
||||
; AVX512-INFS-NEXT: retq
|
||||
;
|
||||
|
|
|
@ -124,7 +124,6 @@ define <4 x float> @fsub_neg_y_vector(<4 x float> %x, <4 x float> %y) {
|
|||
; ANY-LABEL: fsub_neg_y_vector:
|
||||
; ANY: # %bb.0:
|
||||
; ANY-NEXT: mulps {{.*}}(%rip), %xmm0
|
||||
; ANY-NEXT: xorps {{.*}}(%rip), %xmm0
|
||||
; ANY-NEXT: retq
|
||||
%mul = fmul <4 x float> %x, <float 5.0, float 5.0, float 5.0, float 5.0>
|
||||
%add = fadd <4 x float> %mul, %y
|
||||
|
@ -136,7 +135,6 @@ define <4 x float> @fsub_neg_y_vector_nonuniform(<4 x float> %x, <4 x float> %y)
|
|||
; ANY-LABEL: fsub_neg_y_vector_nonuniform:
|
||||
; ANY: # %bb.0:
|
||||
; ANY-NEXT: mulps {{.*}}(%rip), %xmm0
|
||||
; ANY-NEXT: xorps {{.*}}(%rip), %xmm0
|
||||
; ANY-NEXT: retq
|
||||
%mul = fmul <4 x float> %x, <float 5.0, float 6.0, float 7.0, float 8.0>
|
||||
%add = fadd <4 x float> %mul, %y
|
||||
|
@ -159,7 +157,6 @@ define <4 x float> @fsub_neg_y_commute_vector(<4 x float> %x, <4 x float> %y) {
|
|||
; ANY-LABEL: fsub_neg_y_commute_vector:
|
||||
; ANY: # %bb.0:
|
||||
; ANY-NEXT: mulps {{.*}}(%rip), %xmm0
|
||||
; ANY-NEXT: xorps {{.*}}(%rip), %xmm0
|
||||
; ANY-NEXT: retq
|
||||
%mul = fmul <4 x float> %x, <float 5.0, float 5.0, float 5.0, float 5.0>
|
||||
%add = fadd <4 x float> %y, %mul
|
||||
|
@ -233,7 +230,8 @@ define float @fsub_negzero(float %x) {
|
|||
define <4 x float> @fsub_negzero_vector(<4 x float> %x) {
|
||||
; STRICT-LABEL: fsub_negzero_vector:
|
||||
; STRICT: # %bb.0:
|
||||
; STRICT-NEXT: subps {{.*}}(%rip), %xmm0
|
||||
; STRICT-NEXT: xorps %xmm1, %xmm1
|
||||
; STRICT-NEXT: addps %xmm1, %xmm0
|
||||
; STRICT-NEXT: retq
|
||||
;
|
||||
; UNSAFE-LABEL: fsub_negzero_vector:
|
||||
|
|
Loading…
Reference in New Issue