forked from OSchip/llvm-project
1. Use pxor instead of xoraps / xorapd to clear FR32 / FR64 registers. This
proves to be worth 20% on Ptrdist/ks. Might be related to dependency breaking support. 2. Added FsMOVAPSrr and FsMOVAPDrr as aliases to MOVAPSrr and MOVAPDrr. These are used for FR32 / FR64 reg-to-reg copies. 3. Tell reg-allocator to generate MOVSSrm / MOVSDrm and MOVSSmr / MOVSDmr to spill / restore FsMOVAPSrr and FsMOVAPDrr. llvm-svn: 26241
This commit is contained in:
parent
3f99628939
commit
24c461b51e
|
@ -29,6 +29,7 @@ bool X86InstrInfo::isMoveInstr(const MachineInstr& MI,
|
|||
MachineOpCode oc = MI.getOpcode();
|
||||
if (oc == X86::MOV8rr || oc == X86::MOV16rr || oc == X86::MOV32rr ||
|
||||
oc == X86::FpMOV || oc == X86::MOVSSrr || oc == X86::MOVSDrr ||
|
||||
oc == X86::FsMOVAPSrr || oc == X86::FsMOVAPDrr ||
|
||||
oc == X86::MOVAPSrr || oc == X86::MOVAPDrr) {
|
||||
assert(MI.getNumOperands() == 2 &&
|
||||
MI.getOperand(0).isRegister() &&
|
||||
|
|
|
@ -2487,13 +2487,13 @@ def UCOMISDrm: I<0x2E, MRMSrcMem, (ops FR64:$src1, f64mem:$src2),
|
|||
[(X86cmp FR64:$src1, (loadf64 addr:$src2))]>,
|
||||
Requires<[HasSSE2]>, TB, OpSize;
|
||||
|
||||
// Pseudo-instructions that map fld0 to xorps/xorpd for sse.
|
||||
// Pseudo-instructions that map fld0 to pxor for sse.
|
||||
// FIXME: remove when we can teach regalloc that xor reg, reg is ok.
|
||||
def FLD0SS : I<0x57, MRMInitReg, (ops FR32:$dst),
|
||||
"xorps $dst, $dst", [(set FR32:$dst, fp32imm0)]>,
|
||||
def FLD0SS : I<0xEF, MRMInitReg, (ops FR32:$dst),
|
||||
"pxor $dst, $dst", [(set FR32:$dst, fp32imm0)]>,
|
||||
Requires<[HasSSE1]>, TB;
|
||||
def FLD0SD : I<0x57, MRMInitReg, (ops FR64:$dst),
|
||||
"xorpd $dst, $dst", [(set FR64:$dst, fp64imm0)]>,
|
||||
def FLD0SD : I<0xEF, MRMInitReg, (ops FR64:$dst),
|
||||
"pxor $dst, $dst", [(set FR64:$dst, fp64imm0)]>,
|
||||
Requires<[HasSSE2]>, TB, OpSize;
|
||||
|
||||
let isTwoAddress = 1 in {
|
||||
|
@ -3033,13 +3033,22 @@ def MOVAPDmr : I<0x29, MRMDestMem, (ops f128mem:$dst, V2F8:$src),
|
|||
"movapd {$src, $dst|$dst, $src}",[]>,
|
||||
Requires<[HasSSE2]>, TB, OpSize;
|
||||
|
||||
// Pseudo-instructions to load FR32 / FR64 from f128mem using movaps / movapd.
|
||||
// Alias instructions to do FR32 / FR64 reg-to-reg copy using movaps / movapd.
|
||||
// Upper bits are disregarded.
|
||||
def MOVSAPSrm : I<0x28, MRMSrcMem, (ops FR32:$dst, f128mem:$src),
|
||||
"movaps {$src, $dst|$dst, $src}",
|
||||
[(set FR32:$dst, (X86loadpf32 addr:$src))]>,
|
||||
Requires<[HasSSE1]>, TB;
|
||||
def MOVSAPDrm : I<0x28, MRMSrcMem, (ops FR64:$dst, f128mem:$src),
|
||||
def FsMOVAPSrr : I<0x28, MRMSrcReg, (ops V4F4:$dst, V4F4:$src),
|
||||
"movaps {$src, $dst|$dst, $src}", []>,
|
||||
Requires<[HasSSE1]>, TB;
|
||||
def FsMOVAPDrr : I<0x28, MRMSrcReg, (ops V2F8:$dst, V2F8:$src),
|
||||
"movapd {$src, $dst|$dst, $src}", []>,
|
||||
Requires<[HasSSE2]>, TB, OpSize;
|
||||
|
||||
// Alias instructions to load FR32 / FR64 from f128mem using movaps / movapd.
|
||||
// Upper bits are disregarded.
|
||||
def FsMOVAPSrm : I<0x28, MRMSrcMem, (ops FR32:$dst, f128mem:$src),
|
||||
"movaps {$src, $dst|$dst, $src}",
|
||||
[(set FR32:$dst, (X86loadpf32 addr:$src))]>,
|
||||
Requires<[HasSSE1]>, TB;
|
||||
def FsMOVAPDrm : I<0x28, MRMSrcMem, (ops FR64:$dst, f128mem:$src),
|
||||
"movapd {$src, $dst|$dst, $src}",
|
||||
[(set FR64:$dst, (X86loadpf64 addr:$src))]>,
|
||||
Requires<[HasSSE2]>, TB, OpSize;
|
||||
|
|
|
@ -114,9 +114,9 @@ void X86RegisterInfo::copyRegToReg(MachineBasicBlock &MBB,
|
|||
} else if (RC == &X86::RFPRegClass || RC == &X86::RSTRegClass) {
|
||||
Opc = X86::FpMOV;
|
||||
} else if (RC == &X86::FR32RegClass || RC == &X86::V4F4RegClass) {
|
||||
Opc = X86::MOVAPSrr;
|
||||
Opc = X86::FsMOVAPSrr;
|
||||
} else if (RC == &X86::FR64RegClass || RC == &X86::V2F8RegClass) {
|
||||
Opc = X86::MOVAPDrr;
|
||||
Opc = X86::FsMOVAPDrr;
|
||||
} else {
|
||||
assert(0 && "Unknown regclass");
|
||||
abort();
|
||||
|
@ -313,6 +313,9 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr* MI,
|
|||
case X86::CMP8ri: return MakeMIInst(X86::CMP8mi , FrameIndex, MI);
|
||||
case X86::CMP16ri: return MakeMIInst(X86::CMP16mi, FrameIndex, MI);
|
||||
case X86::CMP32ri: return MakeMIInst(X86::CMP32mi, FrameIndex, MI);
|
||||
// Alias scalar SSE instructions
|
||||
case X86::FsMOVAPSrr: return MakeMRInst(X86::MOVSSmr, FrameIndex, MI);
|
||||
case X86::FsMOVAPDrr: return MakeMRInst(X86::MOVSDmr, FrameIndex, MI);
|
||||
// Scalar SSE instructions
|
||||
case X86::MOVSSrr: return MakeMRInst(X86::MOVSSmr, FrameIndex, MI);
|
||||
case X86::MOVSDrr: return MakeMRInst(X86::MOVSDmr, FrameIndex, MI);
|
||||
|
@ -393,6 +396,9 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr* MI,
|
|||
case X86::MOVZX16rr8:return MakeRMInst(X86::MOVZX16rm8 , FrameIndex, MI);
|
||||
case X86::MOVZX32rr8:return MakeRMInst(X86::MOVZX32rm8, FrameIndex, MI);
|
||||
case X86::MOVZX32rr16:return MakeRMInst(X86::MOVZX32rm16, FrameIndex, MI);
|
||||
// Alias scalar SSE instructions
|
||||
case X86::FsMOVAPSrr:return MakeRMInst(X86::MOVSSrm, FrameIndex, MI);
|
||||
case X86::FsMOVAPDrr:return MakeRMInst(X86::MOVSDrm, FrameIndex, MI);
|
||||
// Scalar SSE instructions
|
||||
case X86::MOVSSrr: return MakeRMInst(X86::MOVSSrm, FrameIndex, MI);
|
||||
case X86::MOVSDrr: return MakeRMInst(X86::MOVSDrm, FrameIndex, MI);
|
||||
|
|
Loading…
Reference in New Issue