forked from OSchip/llvm-project
[X86][MMX] Handle i32->mmx conversion using movd
Implement a BITCAST dag combine to transform i32->mmx conversion patterns into a X86 specific node (MMX_MOVW2D) and guarantee that moves between i32 and x86mmx are better handled, i.e., don't use store-load to do the conversion.. llvm-svn: 228293
This commit is contained in:
parent
cc6089d2e0
commit
ab9ae87623
|
@ -1675,6 +1675,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
|||
// We have target-specific dag combine patterns for the following nodes:
|
||||
setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
|
||||
setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
|
||||
setTargetDAGCombine(ISD::BITCAST);
|
||||
setTargetDAGCombine(ISD::VSELECT);
|
||||
setTargetDAGCombine(ISD::SELECT);
|
||||
setTargetDAGCombine(ISD::SHL);
|
||||
|
@ -22986,6 +22987,25 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG,
|
|||
EltNo);
|
||||
}
|
||||
|
||||
/// \brief Detect bitcasts between i32 to x86mmx low word. Since MMX types are
|
||||
/// special and don't usually play with other vector types, it's better to
|
||||
/// handle them early to be sure we emit efficient code by avoiding
|
||||
/// store-load conversions.
|
||||
static SDValue PerformBITCASTCombine(SDNode *N, SelectionDAG &DAG) {
|
||||
if (N->getValueType(0) != MVT::x86mmx ||
|
||||
N->getOperand(0)->getOpcode() != ISD::BUILD_VECTOR ||
|
||||
N->getOperand(0)->getValueType(0) != MVT::v2i32)
|
||||
return SDValue();
|
||||
|
||||
SDValue V = N->getOperand(0);
|
||||
ConstantSDNode *C = dyn_cast<ConstantSDNode>(V.getOperand(1));
|
||||
if (C && C->getZExtValue() == 0 && V.getOperand(0).getValueType() == MVT::i32)
|
||||
return DAG.getNode(X86ISD::MMX_MOVW2D, SDLoc(V.getOperand(0)),
|
||||
N->getValueType(0), V.getOperand(0));
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
/// PerformEXTRACT_VECTOR_ELTCombine - Detect vector gather/scatter index
|
||||
/// generation and convert it from being a bunch of shuffles and extracts
|
||||
/// into a somewhat faster sequence. For i686, the best sequence is apparently
|
||||
|
@ -26129,6 +26149,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
|
|||
case ISD::SELECT:
|
||||
case X86ISD::SHRUNKBLEND:
|
||||
return PerformSELECTCombine(N, DAG, DCI, Subtarget);
|
||||
case ISD::BITCAST: return PerformBITCASTCombine(N, DAG);
|
||||
case X86ISD::CMOV: return PerformCMOVCombine(N, DAG, DCI, Subtarget);
|
||||
case ISD::ADD: return PerformAddCombine(N, DAG, Subtarget);
|
||||
case ISD::SUB: return PerformSubCombine(N, DAG, Subtarget);
|
||||
|
|
|
@ -158,6 +158,10 @@ namespace llvm {
|
|||
/// vector to a GPR.
|
||||
MMX_MOVD2W,
|
||||
|
||||
/// MMX_MOVW2D - Copies a GPR into the low 32-bit word of a MMX vector
|
||||
/// and zero out the high word.
|
||||
MMX_MOVW2D,
|
||||
|
||||
/// PEXTRB - Extract an 8-bit value from a vector and zero extend it to
|
||||
/// i32, corresponds to X86::PEXTRB.
|
||||
PEXTRB,
|
||||
|
|
|
@ -18,6 +18,9 @@
|
|||
// Low word of MMX to GPR.
|
||||
def MMX_X86movd2w : SDNode<"X86ISD::MMX_MOVD2W", SDTypeProfile<1, 1,
|
||||
[SDTCisVT<0, i32>, SDTCisVT<1, x86mmx>]>>;
|
||||
// GPR to low word of MMX.
|
||||
def MMX_X86movw2d : SDNode<"X86ISD::MMX_MOVW2D", SDTypeProfile<1, 1,
|
||||
[SDTCisVT<0, x86mmx>, SDTCisVT<1, i32>]>>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// MMX Pattern Fragments
|
||||
|
|
|
@ -229,6 +229,16 @@ def MMX_MOVD64rm : MMXI<0x6E, MRMSrcMem, (outs VR64:$dst), (ins i32mem:$src),
|
|||
[(set VR64:$dst,
|
||||
(x86mmx (scalar_to_vector (loadi32 addr:$src))))],
|
||||
IIC_MMX_MOV_MM_RM>, Sched<[WriteLoad]>;
|
||||
|
||||
let Predicates = [HasMMX] in {
|
||||
let AddedComplexity = 15 in
|
||||
def : Pat<(x86mmx (MMX_X86movw2d GR32:$src)),
|
||||
(MMX_MOVD64rr GR32:$src)>;
|
||||
let AddedComplexity = 20 in
|
||||
def : Pat<(x86mmx (MMX_X86movw2d (loadi32 addr:$src))),
|
||||
(MMX_MOVD64rm addr:$src)>;
|
||||
}
|
||||
|
||||
let mayStore = 1 in
|
||||
def MMX_MOVD64mr : MMXI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR64:$src),
|
||||
"movd\t{$src, $dst|$dst, $src}", [], IIC_MMX_MOV_MM_RM>,
|
||||
|
|
|
@ -22,11 +22,10 @@ entry:
|
|||
define i64 @t1(i64 %x, i32 %n) {
|
||||
; CHECK-LABEL: t1:
|
||||
; CHECK: ## BB#0: ## %entry
|
||||
; CHECK-NEXT: movd %rdi, %mm0
|
||||
; CHECK-NEXT: movd %esi, %xmm0
|
||||
; CHECK-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: psllq -{{[0-9]+}}(%rsp), %mm0
|
||||
; CHECK-NEXT: movd %mm0, %rax
|
||||
; CHECK-NEXT: movd %esi, %mm0
|
||||
; CHECK-NEXT: movd %rdi, %mm1
|
||||
; CHECK-NEXT: psllq %mm0, %mm1
|
||||
; CHECK-NEXT: movd %mm1, %rax
|
||||
; CHECK-NEXT: retq
|
||||
entry:
|
||||
%0 = bitcast i64 %x to x86_mmx
|
||||
|
@ -38,15 +37,12 @@ entry:
|
|||
define i64 @t2(i64 %x, i32 %n, i32 %w) {
|
||||
; CHECK-LABEL: t2:
|
||||
; CHECK: ## BB#0: ## %entry
|
||||
; CHECK-NEXT: movd %edx, %xmm0
|
||||
; CHECK-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: movq -{{[0-9]+}}(%rsp), %mm0
|
||||
; CHECK-NEXT: movd %esi, %xmm0
|
||||
; CHECK-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: psllq -{{[0-9]+}}(%rsp), %mm0
|
||||
; CHECK-NEXT: movd %rdi, %mm1
|
||||
; CHECK-NEXT: por %mm0, %mm1
|
||||
; CHECK-NEXT: movd %mm1, %rax
|
||||
; CHECK-NEXT: movd %esi, %mm0
|
||||
; CHECK-NEXT: movd %edx, %mm1
|
||||
; CHECK-NEXT: psllq %mm0, %mm1
|
||||
; CHECK-NEXT: movd %rdi, %mm0
|
||||
; CHECK-NEXT: por %mm1, %mm0
|
||||
; CHECK-NEXT: movd %mm0, %rax
|
||||
; CHECK-NEXT: retq
|
||||
entry:
|
||||
%0 = insertelement <2 x i32> undef, i32 %w, i32 0
|
||||
|
@ -63,9 +59,8 @@ define i64 @t3(<1 x i64>* %y, i32* %n) {
|
|||
; CHECK-LABEL: t3:
|
||||
; CHECK: ## BB#0: ## %entry
|
||||
; CHECK-NEXT: movq (%rdi), %mm0
|
||||
; CHECK-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; CHECK-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: psllq -{{[0-9]+}}(%rsp), %mm0
|
||||
; CHECK-NEXT: movd (%rsi), %mm1
|
||||
; CHECK-NEXT: psllq %mm1, %mm0
|
||||
; CHECK-NEXT: movd %mm0, %rax
|
||||
; CHECK-NEXT: retq
|
||||
entry:
|
||||
|
|
|
@ -23,9 +23,8 @@ define i32 @test0(<1 x i64>* %v4) {
|
|||
define i32 @test1(i32* nocapture readonly %ptr) {
|
||||
; CHECK-LABEL: test1:
|
||||
; CHECK: ## BB#0: ## %entry
|
||||
; CHECK-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; CHECK-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: pshufw $232, -{{[0-9]+}}(%rsp), %mm0
|
||||
; CHECK-NEXT: movd (%rdi), %mm0
|
||||
; CHECK-NEXT: pshufw $232, %mm0, %mm0
|
||||
; CHECK-NEXT: movd %mm0, %eax
|
||||
; CHECK-NEXT: emms
|
||||
; CHECK-NEXT: retq
|
||||
|
|
Loading…
Reference in New Issue