forked from OSchip/llvm-project
X86 MMX: optimize transfer from mmx to i32
We used to generate a store (movq) + a load. Now we use movd. rdar://9946746 llvm-svn: 167056
This commit is contained in:
parent
47a299dcc9
commit
acb8becc73
|
@ -14399,6 +14399,14 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
|
||||||
return NewOp;
|
return NewOp;
|
||||||
|
|
||||||
SDValue InputVector = N->getOperand(0);
|
SDValue InputVector = N->getOperand(0);
|
||||||
|
// Detect whether we are trying to convert from mmx to i32 and the bitcast
|
||||||
|
// from mmx to v2i32 has a single usage.
|
||||||
|
if (InputVector.getNode()->getOpcode() == llvm::ISD::BITCAST &&
|
||||||
|
InputVector.getNode()->getOperand(0).getValueType() == MVT::x86mmx &&
|
||||||
|
InputVector.hasOneUse() && N->getValueType(0) == MVT::i32)
|
||||||
|
return DAG.getNode(X86ISD::MMX_MOVD2W, InputVector.getDebugLoc(),
|
||||||
|
N->getValueType(0),
|
||||||
|
InputVector.getNode()->getOperand(0));
|
||||||
|
|
||||||
// Only operate on vectors of 4 elements, where the alternative shuffling
|
// Only operate on vectors of 4 elements, where the alternative shuffling
|
||||||
// gets to be more expensive.
|
// gets to be more expensive.
|
||||||
|
|
|
@ -142,6 +142,10 @@ namespace llvm {
|
||||||
/// mnemonic, so do I; blame Intel.
|
/// mnemonic, so do I; blame Intel.
|
||||||
MOVDQ2Q,
|
MOVDQ2Q,
|
||||||
|
|
||||||
|
/// MMX_MOVD2W - Copies a 32-bit value from the low word of a MMX
|
||||||
|
/// vector to a GPR.
|
||||||
|
MMX_MOVD2W,
|
||||||
|
|
||||||
/// PEXTRB - Extract an 8-bit value from a vector and zero extend it to
|
/// PEXTRB - Extract an 8-bit value from a vector and zero extend it to
|
||||||
/// i32, corresponds to X86::PEXTRB.
|
/// i32, corresponds to X86::PEXTRB.
|
||||||
PEXTRB,
|
PEXTRB,
|
||||||
|
|
|
@ -207,8 +207,14 @@ def MMX_MOVD64rm : MMXI<0x6E, MRMSrcMem, (outs VR64:$dst), (ins i32mem:$src),
|
||||||
let mayStore = 1 in
|
let mayStore = 1 in
|
||||||
def MMX_MOVD64mr : MMXI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR64:$src),
|
def MMX_MOVD64mr : MMXI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR64:$src),
|
||||||
"movd\t{$src, $dst|$dst, $src}", [], IIC_MMX_MOV_MM_RM>;
|
"movd\t{$src, $dst|$dst, $src}", [], IIC_MMX_MOV_MM_RM>;
|
||||||
def MMX_MOVD64grr : MMXI<0x7E, MRMDestReg, (outs), (ins GR32:$dst, VR64:$src),
|
|
||||||
"movd\t{$src, $dst|$dst, $src}", [], IIC_MMX_MOV_REG_MM>;
|
// Low word of MMX to GPR.
|
||||||
|
def MMX_X86movd2w : SDNode<"X86ISD::MMX_MOVD2W", SDTypeProfile<1, 1,
|
||||||
|
[SDTCisVT<0, i32>, SDTCisVT<1, x86mmx>]>>;
|
||||||
|
def MMX_MOVD64grr : MMXI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR64:$src),
|
||||||
|
"movd\t{$src, $dst|$dst, $src}",
|
||||||
|
[(set GR32:$dst,
|
||||||
|
(MMX_X86movd2w (x86mmx VR64:$src)))], IIC_MMX_MOV_REG_MM>;
|
||||||
|
|
||||||
let neverHasSideEffects = 1 in
|
let neverHasSideEffects = 1 in
|
||||||
def MMX_MOVD64to64rr : MMXRI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src),
|
def MMX_MOVD64to64rr : MMXRI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src),
|
||||||
|
|
|
@ -1043,6 +1043,20 @@ entry:
|
||||||
ret i64 %5
|
ret i64 %5
|
||||||
}
|
}
|
||||||
|
|
||||||
|
define i32 @test21_2(<1 x i64> %a) nounwind readnone optsize ssp {
|
||||||
|
; CHECK: test21_2
|
||||||
|
; CHECK: pshufw
|
||||||
|
; CHECK: movd
|
||||||
|
entry:
|
||||||
|
%0 = bitcast <1 x i64> %a to <4 x i16>
|
||||||
|
%1 = bitcast <4 x i16> %0 to x86_mmx
|
||||||
|
%2 = tail call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx %1, i8 3) nounwind readnone
|
||||||
|
%3 = bitcast x86_mmx %2 to <4 x i16>
|
||||||
|
%4 = bitcast <4 x i16> %3 to <2 x i32>
|
||||||
|
%5 = extractelement <2 x i32> %4, i32 0
|
||||||
|
ret i32 %5
|
||||||
|
}
|
||||||
|
|
||||||
declare x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx, x86_mmx) nounwind readnone
|
declare x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx, x86_mmx) nounwind readnone
|
||||||
|
|
||||||
define i64 @test20(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
|
define i64 @test20(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
|
||||||
|
|
Loading…
Reference in New Issue