AVX-512: Fixed a bug in OR/XOR operations for 512-bit FP values on KNL.

KNL does not have VXORPS, VORPS for 512-bit values. I use integer VPXOR, VPOR that actually do the same. X86ISD::FXOR/FOR are generated as a result of FSUB combining. Differential Revision: http://reviews.llvm.org/D12753 llvm-svn: 247523
2015-09-13 08:15:15 +00:00 · 2015-09-13 08:15:15 +00:00 · 8671fcbbd6
parent 444d005615
commit 8671fcbbd6
2 changed files with 26 additions and 2 deletions
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@ -25152,7 +25152,8 @@ static SDValue PerformFSUBCombine(SDNode *N, SelectionDAG &DAG,
 }

 /// Do target-specific dag combines on X86ISD::FOR and X86ISD::FXOR nodes.
-static SDValue PerformFORCombine(SDNode *N, SelectionDAG &DAG) {
+static SDValue PerformFORCombine(SDNode *N, SelectionDAG &DAG,
+                                 const X86Subtarget *Subtarget) {
  assert(N->getOpcode() == X86ISD::FOR || N->getOpcode() == X86ISD::FXOR);

  // F[X]OR(0.0, x) -> x
@ -25164,6 +25165,19 @@ static SDValue PerformFORCombine(SDNode *N, SelectionDAG &DAG) {
  if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(1)))
    if (C->getValueAPF().isPosZero())
      return N->getOperand(0);
+
+  EVT VT = N->getValueType(0);
+  if (VT.is512BitVector() && !Subtarget->hasDQI()) {
+    SDLoc dl(N);
+    MVT IntScalar = MVT::getIntegerVT(VT.getScalarSizeInBits());
+    MVT IntVT = MVT::getVectorVT(IntScalar, VT.getVectorNumElements());
+
+    SDValue Op0 = DAG.getNode(ISD::BITCAST, dl, IntVT, N->getOperand(0));
+    SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, IntVT, N->getOperand(1));
+    unsigned IntOpcode = (N->getOpcode() == X86ISD::FOR) ? ISD::OR : ISD::XOR;
+    SDValue IntOp = DAG.getNode(IntOpcode, dl, IntVT, Op0, Op1);
+    return  DAG.getNode(ISD::BITCAST, dl, VT, IntOp);
+  }
  return SDValue();
 }

@ -26027,7 +26041,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
  case ISD::FADD:           return PerformFADDCombine(N, DAG, Subtarget);
  case ISD::FSUB:           return PerformFSUBCombine(N, DAG, Subtarget);
  case X86ISD::FXOR:
-  case X86ISD::FOR:         return PerformFORCombine(N, DAG);
+  case X86ISD::FOR:         return PerformFORCombine(N, DAG, Subtarget);
  case X86ISD::FMIN:
  case X86ISD::FMAX:        return PerformFMinFMaxCombine(N, DAG);
  case X86ISD::FAND:        return PerformFANDCombine(N, DAG);
--- a/llvm/test/CodeGen/X86/avx512-arith.ll
+++ b/llvm/test/CodeGen/X86/avx512-arith.ll
@ -652,3 +652,13 @@ define <8 x double> @test_maskz_broadcast_vaddpd(<8 x double> %i, double* %j,
  %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> zeroinitializer
  ret <8 x double> %r
 }
+
+; CHECK-LABEL: test_fxor
+; CHECK: vpxord
+; CHECK: ret
+define <16 x float>  @test_fxor(<16 x float> %a) {
+
+  %res = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a
+  ret <16 x float>%res
+}
+