forked from OSchip/llvm-project
[X86] Stop changing f128 fand/for/fxor to v2i64.
The additional patterns don't cost us much and it seems better than changing element widths. llvm-svn: 345564
This commit is contained in:
parent
d8e14a5901
commit
676d7a7a43
|
@ -37745,10 +37745,12 @@ static SDValue lowerX86FPLogicOp(SDNode *N, SelectionDAG &DAG,
|
||||||
const X86Subtarget &Subtarget) {
|
const X86Subtarget &Subtarget) {
|
||||||
MVT VT = N->getSimpleValueType(0);
|
MVT VT = N->getSimpleValueType(0);
|
||||||
// If we have integer vector types available, use the integer opcodes.
|
// If we have integer vector types available, use the integer opcodes.
|
||||||
if ((VT.isVector() || VT == MVT::f128) && Subtarget.hasSSE2()) {
|
if (!VT.isVector() || !Subtarget.hasSSE2())
|
||||||
|
return SDValue();
|
||||||
|
|
||||||
SDLoc dl(N);
|
SDLoc dl(N);
|
||||||
|
|
||||||
unsigned IntBits = std::min(VT.getScalarSizeInBits(), 64U);
|
unsigned IntBits = VT.getScalarSizeInBits();
|
||||||
MVT IntSVT = MVT::getIntegerVT(IntBits);
|
MVT IntSVT = MVT::getIntegerVT(IntBits);
|
||||||
MVT IntVT = MVT::getVectorVT(IntSVT, VT.getSizeInBits() / IntBits);
|
MVT IntVT = MVT::getVectorVT(IntSVT, VT.getSizeInBits() / IntBits);
|
||||||
|
|
||||||
|
@ -37764,8 +37766,6 @@ static SDValue lowerX86FPLogicOp(SDNode *N, SelectionDAG &DAG,
|
||||||
}
|
}
|
||||||
SDValue IntOp = DAG.getNode(IntOpcode, dl, IntVT, Op0, Op1);
|
SDValue IntOp = DAG.getNode(IntOpcode, dl, IntVT, Op0, Op1);
|
||||||
return DAG.getBitcast(VT, IntOp);
|
return DAG.getBitcast(VT, IntOp);
|
||||||
}
|
|
||||||
return SDValue();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -466,8 +466,6 @@ def : Pat<(loadf128 addr:$src),
|
||||||
(VMOVUPSZ128rm addr:$src)>;
|
(VMOVUPSZ128rm addr:$src)>;
|
||||||
}
|
}
|
||||||
|
|
||||||
// With SSE2 the DAG combiner converts fp logic ops to integer logic ops to
|
|
||||||
// reduce patterns.
|
|
||||||
let Predicates = [UseSSE1] in {
|
let Predicates = [UseSSE1] in {
|
||||||
// andps is shorter than andpd or pand. andps is SSE and andpd/pand are in SSE2
|
// andps is shorter than andpd or pand. andps is SSE and andpd/pand are in SSE2
|
||||||
def : Pat<(f128 (X86fand VR128:$src1, (memopf128 addr:$src2))),
|
def : Pat<(f128 (X86fand VR128:$src1, (memopf128 addr:$src2))),
|
||||||
|
@ -489,4 +487,23 @@ def : Pat<(f128 (X86fxor VR128:$src1, VR128:$src2)),
|
||||||
(XORPSrr VR128:$src1, VR128:$src2)>;
|
(XORPSrr VR128:$src1, VR128:$src2)>;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let Predicates = [HasAVX] in {
|
||||||
|
// andps is shorter than andpd or pand. andps is SSE and andpd/pand are in SSE2
|
||||||
|
def : Pat<(f128 (X86fand VR128:$src1, (loadf128 addr:$src2))),
|
||||||
|
(VANDPSrm VR128:$src1, f128mem:$src2)>;
|
||||||
|
|
||||||
|
def : Pat<(f128 (X86fand VR128:$src1, VR128:$src2)),
|
||||||
|
(VANDPSrr VR128:$src1, VR128:$src2)>;
|
||||||
|
|
||||||
|
def : Pat<(f128 (X86for VR128:$src1, (loadf128 addr:$src2))),
|
||||||
|
(VORPSrm VR128:$src1, f128mem:$src2)>;
|
||||||
|
|
||||||
|
def : Pat<(f128 (X86for VR128:$src1, VR128:$src2)),
|
||||||
|
(VORPSrr VR128:$src1, VR128:$src2)>;
|
||||||
|
|
||||||
|
def : Pat<(f128 (X86fxor VR128:$src1, (loadf128 addr:$src2))),
|
||||||
|
(VXORPSrm VR128:$src1, f128mem:$src2)>;
|
||||||
|
|
||||||
|
def : Pat<(f128 (X86fxor VR128:$src1, VR128:$src2)),
|
||||||
|
(VXORPSrr VR128:$src1, VR128:$src2)>;
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue