forked from OSchip/llvm-project
[X86][MMX] Improve MMX constant generation
Extend the MMX zero code to take any constant with zero'd upper 32-bits llvm-svn: 322553
This commit is contained in:
parent
760c0c9ed3
commit
85e6139633
|
@ -5396,6 +5396,12 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits,
|
||||||
SmallVector<APInt, 64> SrcEltBits(1, Cst->getAPIntValue());
|
SmallVector<APInt, 64> SrcEltBits(1, Cst->getAPIntValue());
|
||||||
return CastBitData(UndefSrcElts, SrcEltBits);
|
return CastBitData(UndefSrcElts, SrcEltBits);
|
||||||
}
|
}
|
||||||
|
if (auto *Cst = dyn_cast<ConstantFPSDNode>(Op)) {
|
||||||
|
APInt UndefSrcElts = APInt::getNullValue(1);
|
||||||
|
APInt RawBits = Cst->getValueAPF().bitcastToAPInt();
|
||||||
|
SmallVector<APInt, 64> SrcEltBits(1, RawBits);
|
||||||
|
return CastBitData(UndefSrcElts, SrcEltBits);
|
||||||
|
}
|
||||||
|
|
||||||
// Extract constant bits from build vector.
|
// Extract constant bits from build vector.
|
||||||
if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) {
|
if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) {
|
||||||
|
@ -30407,11 +30413,14 @@ static SDValue combineBitcast(SDNode *N, SelectionDAG &DAG,
|
||||||
// it's better to handle them early to be sure we emit efficient code by
|
// it's better to handle them early to be sure we emit efficient code by
|
||||||
// avoiding store-load conversions.
|
// avoiding store-load conversions.
|
||||||
if (VT == MVT::x86mmx) {
|
if (VT == MVT::x86mmx) {
|
||||||
// Detect zero MMX vectors.
|
// Detect zero-extended MMX constant vectors.
|
||||||
if (X86::isZeroNode(N0) || ISD::isBuildVectorAllZeros(N0.getNode())) {
|
APInt UndefElts;
|
||||||
|
SmallVector<APInt, 2> EltBits;
|
||||||
|
if (getTargetConstantBitsFromNode(N0, 32, UndefElts, EltBits) &&
|
||||||
|
EltBits[1] == 0) {
|
||||||
SDLoc DL(N0);
|
SDLoc DL(N0);
|
||||||
return DAG.getNode(X86ISD::MMX_MOVW2D, DL, VT,
|
return DAG.getNode(X86ISD::MMX_MOVW2D, DL, VT,
|
||||||
DAG.getConstant(0, DL, MVT::i32));
|
DAG.getConstant(EltBits[0], DL, MVT::i32));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Detect bitcasts between i32 to x86mmx low word.
|
// Detect bitcasts between i32 to x86mmx low word.
|
||||||
|
|
|
@ -582,20 +582,15 @@ declare x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx, x86_mmx)
|
||||||
define void @test_psrlq_by_volatile_shift_amount(x86_mmx* %t) nounwind {
|
define void @test_psrlq_by_volatile_shift_amount(x86_mmx* %t) nounwind {
|
||||||
; X86-LABEL: test_psrlq_by_volatile_shift_amount:
|
; X86-LABEL: test_psrlq_by_volatile_shift_amount:
|
||||||
; X86: # %bb.0: # %entry
|
; X86: # %bb.0: # %entry
|
||||||
; X86-NEXT: pushl %ebp
|
; X86-NEXT: pushl %eax
|
||||||
; X86-NEXT: movl %esp, %ebp
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||||
; X86-NEXT: andl $-8, %esp
|
; X86-NEXT: movl $1, (%esp)
|
||||||
; X86-NEXT: subl $16, %esp
|
; X86-NEXT: movd (%esp), %mm0
|
||||||
; X86-NEXT: movl 8(%ebp), %eax
|
; X86-NEXT: movl $255, %ecx
|
||||||
; X86-NEXT: movl $1, {{[0-9]+}}(%esp)
|
; X86-NEXT: movd %ecx, %mm1
|
||||||
; X86-NEXT: movd {{[0-9]+}}(%esp), %mm0
|
|
||||||
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
|
|
||||||
; X86-NEXT: movl $255, {{[0-9]+}}(%esp)
|
|
||||||
; X86-NEXT: movq {{[0-9]+}}(%esp), %mm1
|
|
||||||
; X86-NEXT: psrlq %mm0, %mm1
|
; X86-NEXT: psrlq %mm0, %mm1
|
||||||
; X86-NEXT: movq %mm1, (%eax)
|
; X86-NEXT: movq %mm1, (%eax)
|
||||||
; X86-NEXT: movl %ebp, %esp
|
; X86-NEXT: popl %eax
|
||||||
; X86-NEXT: popl %ebp
|
|
||||||
; X86-NEXT: retl
|
; X86-NEXT: retl
|
||||||
;
|
;
|
||||||
; X64-LABEL: test_psrlq_by_volatile_shift_amount:
|
; X64-LABEL: test_psrlq_by_volatile_shift_amount:
|
||||||
|
@ -603,7 +598,7 @@ define void @test_psrlq_by_volatile_shift_amount(x86_mmx* %t) nounwind {
|
||||||
; X64-NEXT: movl $1, -{{[0-9]+}}(%rsp)
|
; X64-NEXT: movl $1, -{{[0-9]+}}(%rsp)
|
||||||
; X64-NEXT: movd -{{[0-9]+}}(%rsp), %mm0
|
; X64-NEXT: movd -{{[0-9]+}}(%rsp), %mm0
|
||||||
; X64-NEXT: movl $255, %eax
|
; X64-NEXT: movl $255, %eax
|
||||||
; X64-NEXT: movq %rax, %mm1
|
; X64-NEXT: movd %eax, %mm1
|
||||||
; X64-NEXT: psrlq %mm0, %mm1
|
; X64-NEXT: psrlq %mm0, %mm1
|
||||||
; X64-NEXT: movq %mm1, (%rdi)
|
; X64-NEXT: movq %mm1, (%rdi)
|
||||||
; X64-NEXT: retq
|
; X64-NEXT: retq
|
||||||
|
|
|
@ -95,36 +95,30 @@ entry:
|
||||||
define <4 x float> @pr35869() nounwind {
|
define <4 x float> @pr35869() nounwind {
|
||||||
; X32-LABEL: pr35869:
|
; X32-LABEL: pr35869:
|
||||||
; X32: ## %bb.0:
|
; X32: ## %bb.0:
|
||||||
; X32-NEXT: subl $12, %esp
|
|
||||||
; X32-NEXT: movl $64, %eax
|
; X32-NEXT: movl $64, %eax
|
||||||
; X32-NEXT: movd %eax, %xmm0
|
; X32-NEXT: movd %eax, %mm0
|
||||||
; X32-NEXT: movq %xmm0, (%esp)
|
|
||||||
; X32-NEXT: movq (%esp), %mm0
|
|
||||||
; X32-NEXT: pxor %mm1, %mm1
|
; X32-NEXT: pxor %mm1, %mm1
|
||||||
; X32-NEXT: punpcklbw %mm1, %mm0 ## mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3]
|
; X32-NEXT: punpcklbw %mm1, %mm0 ## mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3]
|
||||||
; X32-NEXT: pxor %xmm0, %xmm0
|
|
||||||
; X32-NEXT: pcmpgtw %mm0, %mm1
|
; X32-NEXT: pcmpgtw %mm0, %mm1
|
||||||
; X32-NEXT: movq %mm0, %mm2
|
; X32-NEXT: movq %mm0, %mm2
|
||||||
; X32-NEXT: punpckhwd %mm1, %mm2 ## mm2 = mm2[2],mm1[2],mm2[3],mm1[3]
|
; X32-NEXT: punpckhwd %mm1, %mm2 ## mm2 = mm2[2],mm1[2],mm2[3],mm1[3]
|
||||||
|
; X32-NEXT: xorps %xmm0, %xmm0
|
||||||
; X32-NEXT: cvtpi2ps %mm2, %xmm0
|
; X32-NEXT: cvtpi2ps %mm2, %xmm0
|
||||||
; X32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
|
; X32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
|
||||||
; X32-NEXT: punpcklwd %mm1, %mm0 ## mm0 = mm0[0],mm1[0],mm0[1],mm1[1]
|
; X32-NEXT: punpcklwd %mm1, %mm0 ## mm0 = mm0[0],mm1[0],mm0[1],mm1[1]
|
||||||
; X32-NEXT: cvtpi2ps %mm0, %xmm0
|
; X32-NEXT: cvtpi2ps %mm0, %xmm0
|
||||||
; X32-NEXT: addl $12, %esp
|
|
||||||
; X32-NEXT: retl
|
; X32-NEXT: retl
|
||||||
;
|
;
|
||||||
; X64-LABEL: pr35869:
|
; X64-LABEL: pr35869:
|
||||||
; X64: ## %bb.0:
|
; X64: ## %bb.0:
|
||||||
; X64-NEXT: movl $64, %eax
|
; X64-NEXT: movl $64, %eax
|
||||||
; X64-NEXT: movd %eax, %xmm0
|
; X64-NEXT: movd %eax, %mm0
|
||||||
; X64-NEXT: movq %xmm0, -{{[0-9]+}}(%rsp)
|
|
||||||
; X64-NEXT: movq -{{[0-9]+}}(%rsp), %mm0
|
|
||||||
; X64-NEXT: pxor %mm1, %mm1
|
; X64-NEXT: pxor %mm1, %mm1
|
||||||
; X64-NEXT: punpcklbw %mm1, %mm0 ## mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3]
|
; X64-NEXT: punpcklbw %mm1, %mm0 ## mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3]
|
||||||
; X64-NEXT: pxor %xmm0, %xmm0
|
|
||||||
; X64-NEXT: pcmpgtw %mm0, %mm1
|
; X64-NEXT: pcmpgtw %mm0, %mm1
|
||||||
; X64-NEXT: movq %mm0, %mm2
|
; X64-NEXT: movq %mm0, %mm2
|
||||||
; X64-NEXT: punpckhwd %mm1, %mm2 ## mm2 = mm2[2],mm1[2],mm2[3],mm1[3]
|
; X64-NEXT: punpckhwd %mm1, %mm2 ## mm2 = mm2[2],mm1[2],mm2[3],mm1[3]
|
||||||
|
; X64-NEXT: xorps %xmm0, %xmm0
|
||||||
; X64-NEXT: cvtpi2ps %mm2, %xmm0
|
; X64-NEXT: cvtpi2ps %mm2, %xmm0
|
||||||
; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
|
; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
|
||||||
; X64-NEXT: punpcklwd %mm1, %mm0 ## mm0 = mm0[0],mm1[0],mm0[1],mm1[1]
|
; X64-NEXT: punpcklwd %mm1, %mm0 ## mm0 = mm0[0],mm1[0],mm0[1],mm1[1]
|
||||||
|
|
Loading…
Reference in New Issue