forked from OSchip/llvm-project
AVX-512: Fixed a bug in OR/XOR operations for 512-bit FP values on KNL.
KNL does not have VXORPS, VORPS for 512-bit values. I use integer VPXOR, VPOR that actually do the same. X86ISD::FXOR/FOR are generated as a result of FSUB combining. Differential Revision: http://reviews.llvm.org/D12753 llvm-svn: 247523
This commit is contained in:
parent
444d005615
commit
8671fcbbd6
|
@ -25152,7 +25152,8 @@ static SDValue PerformFSUBCombine(SDNode *N, SelectionDAG &DAG,
|
|||
}
|
||||
|
||||
/// Do target-specific dag combines on X86ISD::FOR and X86ISD::FXOR nodes.
|
||||
static SDValue PerformFORCombine(SDNode *N, SelectionDAG &DAG) {
|
||||
static SDValue PerformFORCombine(SDNode *N, SelectionDAG &DAG,
|
||||
const X86Subtarget *Subtarget) {
|
||||
assert(N->getOpcode() == X86ISD::FOR || N->getOpcode() == X86ISD::FXOR);
|
||||
|
||||
// F[X]OR(0.0, x) -> x
|
||||
|
@ -25164,6 +25165,19 @@ static SDValue PerformFORCombine(SDNode *N, SelectionDAG &DAG) {
|
|||
if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(1)))
|
||||
if (C->getValueAPF().isPosZero())
|
||||
return N->getOperand(0);
|
||||
|
||||
EVT VT = N->getValueType(0);
|
||||
if (VT.is512BitVector() && !Subtarget->hasDQI()) {
|
||||
SDLoc dl(N);
|
||||
MVT IntScalar = MVT::getIntegerVT(VT.getScalarSizeInBits());
|
||||
MVT IntVT = MVT::getVectorVT(IntScalar, VT.getVectorNumElements());
|
||||
|
||||
SDValue Op0 = DAG.getNode(ISD::BITCAST, dl, IntVT, N->getOperand(0));
|
||||
SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, IntVT, N->getOperand(1));
|
||||
unsigned IntOpcode = (N->getOpcode() == X86ISD::FOR) ? ISD::OR : ISD::XOR;
|
||||
SDValue IntOp = DAG.getNode(IntOpcode, dl, IntVT, Op0, Op1);
|
||||
return DAG.getNode(ISD::BITCAST, dl, VT, IntOp);
|
||||
}
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
|
@ -26027,7 +26041,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
|
|||
case ISD::FADD: return PerformFADDCombine(N, DAG, Subtarget);
|
||||
case ISD::FSUB: return PerformFSUBCombine(N, DAG, Subtarget);
|
||||
case X86ISD::FXOR:
|
||||
case X86ISD::FOR: return PerformFORCombine(N, DAG);
|
||||
case X86ISD::FOR: return PerformFORCombine(N, DAG, Subtarget);
|
||||
case X86ISD::FMIN:
|
||||
case X86ISD::FMAX: return PerformFMinFMaxCombine(N, DAG);
|
||||
case X86ISD::FAND: return PerformFANDCombine(N, DAG);
|
||||
|
|
|
@ -652,3 +652,13 @@ define <8 x double> @test_maskz_broadcast_vaddpd(<8 x double> %i, double* %j,
|
|||
%r = select <8 x i1> %mask, <8 x double> %x, <8 x double> zeroinitializer
|
||||
ret <8 x double> %r
|
||||
}
|
||||
|
||||
; CHECK-LABEL: test_fxor
|
||||
; CHECK: vpxord
|
||||
; CHECK: ret
|
||||
define <16 x float> @test_fxor(<16 x float> %a) {
|
||||
|
||||
%res = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a
|
||||
ret <16 x float>%res
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue