forked from OSchip/llvm-project
X86: Shave off one shuffle from the pcmpeqq sequence for SSE2 by making use of and commutativity.
llvm-svn: 171064
This commit is contained in:
parent
df4af41b9b
commit
81b5a8fd2e
|
@ -9173,7 +9173,7 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const {
|
||||||
return SDValue();
|
return SDValue();
|
||||||
if (Opc == X86ISD::PCMPEQ && !Subtarget->hasSSE41()) {
|
if (Opc == X86ISD::PCMPEQ && !Subtarget->hasSSE41()) {
|
||||||
// If pcmpeqq is missing but pcmpeqd is available synthesize pcmpeqq with
|
// If pcmpeqq is missing but pcmpeqd is available synthesize pcmpeqq with
|
||||||
// pcmpeqd + 2 shuffles + pand.
|
// pcmpeqd + pshufd + pand.
|
||||||
assert(Subtarget->hasSSE2() && !FlipSigns && "Don't know how to lower!");
|
assert(Subtarget->hasSSE2() && !FlipSigns && "Don't know how to lower!");
|
||||||
|
|
||||||
// First cast everything to the right type,
|
// First cast everything to the right type,
|
||||||
|
@ -9184,11 +9184,9 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const {
|
||||||
SDValue Result = DAG.getNode(Opc, dl, MVT::v4i32, Op0, Op1);
|
SDValue Result = DAG.getNode(Opc, dl, MVT::v4i32, Op0, Op1);
|
||||||
|
|
||||||
// Make sure the lower and upper halves are both all-ones.
|
// Make sure the lower and upper halves are both all-ones.
|
||||||
const int Mask1[] = { 0, 0, 2, 2 };
|
const int Mask[] = { 1, 0, 3, 2 };
|
||||||
SDValue S1 = DAG.getVectorShuffle(MVT::v4i32, dl, Result, Result, Mask1);
|
SDValue Shuf = DAG.getVectorShuffle(MVT::v4i32, dl, Result, Result, Mask);
|
||||||
const int Mask2[] = { 1, 1, 3, 3 };
|
Result = DAG.getNode(ISD::AND, dl, MVT::v4i32, Result, Shuf);
|
||||||
SDValue S2 = DAG.getVectorShuffle(MVT::v4i32, dl, Result, Result, Mask2);
|
|
||||||
Result = DAG.getNode(ISD::AND, dl, MVT::v4i32, S1, S2);
|
|
||||||
|
|
||||||
if (Invert)
|
if (Invert)
|
||||||
Result = DAG.getNOT(dl, Result, MVT::v4i32);
|
Result = DAG.getNOT(dl, Result, MVT::v4i32);
|
||||||
|
|
|
@ -45,8 +45,7 @@ define <4 x i32> @test4(<4 x i32> %A, <4 x i32> %B) nounwind {
|
||||||
define <2 x i64> @test5(<2 x i64> %A, <2 x i64> %B) nounwind {
|
define <2 x i64> @test5(<2 x i64> %A, <2 x i64> %B) nounwind {
|
||||||
; CHECK: test5:
|
; CHECK: test5:
|
||||||
; CHECK: pcmpeqd
|
; CHECK: pcmpeqd
|
||||||
; CHECK: pshufd $-11
|
; CHECK: pshufd $-79
|
||||||
; CHECK: pshufd $-96
|
|
||||||
; CHECK: pand
|
; CHECK: pand
|
||||||
; CHECK: ret
|
; CHECK: ret
|
||||||
%C = icmp eq <2 x i64> %A, %B
|
%C = icmp eq <2 x i64> %A, %B
|
||||||
|
@ -57,8 +56,7 @@ define <2 x i64> @test5(<2 x i64> %A, <2 x i64> %B) nounwind {
|
||||||
define <2 x i64> @test6(<2 x i64> %A, <2 x i64> %B) nounwind {
|
define <2 x i64> @test6(<2 x i64> %A, <2 x i64> %B) nounwind {
|
||||||
; CHECK: test6:
|
; CHECK: test6:
|
||||||
; CHECK: pcmpeqd
|
; CHECK: pcmpeqd
|
||||||
; CHECK: pshufd $-11
|
; CHECK: pshufd $-79
|
||||||
; CHECK: pshufd $-96
|
|
||||||
; CHECK: pand
|
; CHECK: pand
|
||||||
; CHECK: pcmpeqd
|
; CHECK: pcmpeqd
|
||||||
; CHECK: pxor
|
; CHECK: pxor
|
||||||
|
|
Loading…
Reference in New Issue