AVX-512: Fixed a bug in OR/XOR operations for 512-bit FP values on KNL.

KNL does not have VXORPS, VORPS for 512-bit values.
I use integer VPXOR, VPOR that actually do the same.

X86ISD::FXOR/FOR are generated as a result of FSUB combining.

Differential Revision: http://reviews.llvm.org/D12753

llvm-svn: 247523
This commit is contained in:
Elena Demikhovsky 2015-09-13 08:15:15 +00:00
parent 444d005615
commit 8671fcbbd6
2 changed files with 26 additions and 2 deletions

View File

@ -25152,7 +25152,8 @@ static SDValue PerformFSUBCombine(SDNode *N, SelectionDAG &DAG,
}
/// Do target-specific dag combines on X86ISD::FOR and X86ISD::FXOR nodes.
static SDValue PerformFORCombine(SDNode *N, SelectionDAG &DAG) {
static SDValue PerformFORCombine(SDNode *N, SelectionDAG &DAG,
const X86Subtarget *Subtarget) {
assert(N->getOpcode() == X86ISD::FOR || N->getOpcode() == X86ISD::FXOR);
// F[X]OR(0.0, x) -> x
@ -25164,6 +25165,19 @@ static SDValue PerformFORCombine(SDNode *N, SelectionDAG &DAG) {
if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(1)))
if (C->getValueAPF().isPosZero())
return N->getOperand(0);
EVT VT = N->getValueType(0);
if (VT.is512BitVector() && !Subtarget->hasDQI()) {
SDLoc dl(N);
MVT IntScalar = MVT::getIntegerVT(VT.getScalarSizeInBits());
MVT IntVT = MVT::getVectorVT(IntScalar, VT.getVectorNumElements());
SDValue Op0 = DAG.getNode(ISD::BITCAST, dl, IntVT, N->getOperand(0));
SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, IntVT, N->getOperand(1));
unsigned IntOpcode = (N->getOpcode() == X86ISD::FOR) ? ISD::OR : ISD::XOR;
SDValue IntOp = DAG.getNode(IntOpcode, dl, IntVT, Op0, Op1);
return DAG.getNode(ISD::BITCAST, dl, VT, IntOp);
}
return SDValue();
}
@ -26027,7 +26041,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case ISD::FADD: return PerformFADDCombine(N, DAG, Subtarget);
case ISD::FSUB: return PerformFSUBCombine(N, DAG, Subtarget);
case X86ISD::FXOR:
case X86ISD::FOR: return PerformFORCombine(N, DAG);
case X86ISD::FOR: return PerformFORCombine(N, DAG, Subtarget);
case X86ISD::FMIN:
case X86ISD::FMAX: return PerformFMinFMaxCombine(N, DAG);
case X86ISD::FAND: return PerformFANDCombine(N, DAG);

View File

@ -652,3 +652,13 @@ define <8 x double> @test_maskz_broadcast_vaddpd(<8 x double> %i, double* %j,
%r = select <8 x i1> %mask, <8 x double> %x, <8 x double> zeroinitializer
ret <8 x double> %r
}
; CHECK-LABEL: test_fxor
; CHECK: vpxord
; CHECK: ret
define <16 x float> @test_fxor(<16 x float> %a) {
%res = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a
ret <16 x float>%res
}