[X86] Correct the implementation of the avx512 masked fmsubadd autoupgrade code to not leave the negate unconnected.

This was causing us to generate fmaddsub instead of fmsubadd if rounding control is not 4.
2020-02-07 15:04:22 -08:00 · 2020-02-07 15:04:22 -08:00 · bb717d3f46
parent 598d9dd846
commit bb717d3f46
2 changed files with 3 additions and 4 deletions
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@ -3259,8 +3259,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
          Ops[2] = Builder.CreateFNeg(Ops[2]);

        Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
-                                 {CI->getArgOperand(0), CI->getArgOperand(1),
-                                  CI->getArgOperand(2), CI->getArgOperand(4)});
+                                 Ops);
      } else {
        int NumElts = CI->getType()->getVectorNumElements();

--- a/llvm/test/CodeGen/X86/avx512-fma-intrinsics-upgrade.ll
+++ b/llvm/test/CodeGen/X86/avx512-fma-intrinsics-upgrade.ll
@ -735,7 +735,7 @@ define <8 x double>@test_int_x86_avx512_mask3_vfmsubadd_pd_512_rne(<8 x double>
 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsubadd_pd_512_rne:
 ; CHECK:       ## %bb.0:
 ; CHECK-NEXT:    kmovw %edi, %k1
-; CHECK-NEXT:    vfmaddsub231pd {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT:    vfmsubadd231pd {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
 ; CHECK-NEXT:    vmovapd %zmm2, %zmm0
 ; CHECK-NEXT:    retq
  %res = call <8 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 8)
@ -746,7 +746,7 @@ define <16 x float>@test_int_x86_avx512_mask3_vfmsubadd_ps_512_rne(<16 x float>
 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsubadd_ps_512_rne:
 ; CHECK:       ## %bb.0:
 ; CHECK-NEXT:    kmovw %edi, %k1
-; CHECK-NEXT:    vfmaddsub231ps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT:    vfmsubadd231ps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
 ; CHECK-NEXT:    vmovaps %zmm2, %zmm0
 ; CHECK-NEXT:    retq
  %res = call <16 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 8)