forked from OSchip/llvm-project
- Allow XMM load (for scalar use) to be folded into ANDP* and XORP*.
- Use XORP* to implement fneg. llvm-svn: 25857
This commit is contained in:
parent
a91eb48547
commit
72d5c256c9
|
@ -59,7 +59,7 @@ struct X86ATTAsmPrinter : public X86SharedAsmPrinter {
|
|||
void printf64mem(const MachineInstr *MI, unsigned OpNo) {
|
||||
printMemReference(MI, OpNo);
|
||||
}
|
||||
void printf80mem(const MachineInstr *MI, unsigned OpNo) {
|
||||
void printf128mem(const MachineInstr *MI, unsigned OpNo) {
|
||||
printMemReference(MI, OpNo);
|
||||
}
|
||||
|
||||
|
|
|
@ -253,7 +253,8 @@ bool X86DAGToDAGISel::MatchAddress(SDOperand N, X86ISelAddressMode &AM) {
|
|||
if (AM.BaseType == X86ISelAddressMode::RegBase && AM.Base.Reg.Val == 0) {
|
||||
if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N)) {
|
||||
AM.BaseType = X86ISelAddressMode::ConstantPoolBase;
|
||||
AM.Base.Reg = CurDAG->getTargetConstantPool(CP->get(), MVT::i32);
|
||||
AM.Base.Reg = CurDAG->getTargetConstantPool(CP->get(), MVT::i32,
|
||||
CP->getAlignment());
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
#include "X86ISelLowering.h"
|
||||
#include "X86TargetMachine.h"
|
||||
#include "llvm/CallingConv.h"
|
||||
#include "llvm/Constants.h"
|
||||
#include "llvm/Function.h"
|
||||
#include "llvm/CodeGen/MachineFrameInfo.h"
|
||||
#include "llvm/CodeGen/MachineFunction.h"
|
||||
|
@ -208,16 +209,20 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM)
|
|||
setOperationAction(ISD::EXTLOAD, MVT::f32, Expand);
|
||||
setOperationAction(ISD::ZEXTLOAD, MVT::f32, Expand);
|
||||
|
||||
// Use ANDPD to simulate FABS.
|
||||
setOperationAction(ISD::FABS , MVT::f64, Custom);
|
||||
setOperationAction(ISD::FABS , MVT::f32, Custom);
|
||||
|
||||
// Use XORP to simulate FNEG.
|
||||
setOperationAction(ISD::FNEG , MVT::f64, Custom);
|
||||
setOperationAction(ISD::FNEG , MVT::f32, Custom);
|
||||
|
||||
// We don't support sin/cos/sqrt/fmod
|
||||
setOperationAction(ISD::FSIN , MVT::f64, Expand);
|
||||
setOperationAction(ISD::FCOS , MVT::f64, Expand);
|
||||
setOperationAction(ISD::FABS , MVT::f64, Custom);
|
||||
setOperationAction(ISD::FNEG , MVT::f64, Expand);
|
||||
setOperationAction(ISD::FREM , MVT::f64, Expand);
|
||||
setOperationAction(ISD::FSIN , MVT::f32, Expand);
|
||||
setOperationAction(ISD::FCOS , MVT::f32, Expand);
|
||||
setOperationAction(ISD::FABS , MVT::f32, Custom);
|
||||
setOperationAction(ISD::FNEG , MVT::f32, Expand);
|
||||
setOperationAction(ISD::FREM , MVT::f32, Expand);
|
||||
|
||||
// Expand FP immediates into loads from the stack, except for the special
|
||||
|
@ -1567,11 +1572,44 @@ SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
|
|||
}
|
||||
case ISD::FABS: {
|
||||
MVT::ValueType VT = Op.getValueType();
|
||||
SDOperand Mask = (VT == MVT::f64)
|
||||
? DAG.getConstantFP(BitsToDouble(~(1ULL << 63)), MVT::f64)
|
||||
: DAG.getConstantFP(BitsToFloat (~(1U << 31)), MVT::f32);
|
||||
const Type *OpNTy = MVT::getTypeForValueType(VT);
|
||||
std::vector<Constant*> CV;
|
||||
if (VT == MVT::f64) {
|
||||
CV.push_back(ConstantFP::get(OpNTy, BitsToDouble(~(1ULL << 63))));
|
||||
CV.push_back(ConstantFP::get(OpNTy, 0.0));
|
||||
} else {
|
||||
CV.push_back(ConstantFP::get(OpNTy, BitsToFloat(~(1U << 31))));
|
||||
CV.push_back(ConstantFP::get(OpNTy, 0.0));
|
||||
CV.push_back(ConstantFP::get(OpNTy, 0.0));
|
||||
CV.push_back(ConstantFP::get(OpNTy, 0.0));
|
||||
}
|
||||
Constant *CS = ConstantStruct::get(CV);
|
||||
SDOperand CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4);
|
||||
SDOperand Mask
|
||||
= DAG.getNode(X86ISD::LOAD_PACK,
|
||||
VT, DAG.getEntryNode(), CPIdx, DAG.getSrcValue(NULL));
|
||||
return DAG.getNode(X86ISD::FAND, VT, Op.getOperand(0), Mask);
|
||||
}
|
||||
case ISD::FNEG: {
|
||||
MVT::ValueType VT = Op.getValueType();
|
||||
const Type *OpNTy = MVT::getTypeForValueType(VT);
|
||||
std::vector<Constant*> CV;
|
||||
if (VT == MVT::f64) {
|
||||
CV.push_back(ConstantFP::get(OpNTy, BitsToDouble(1ULL << 63)));
|
||||
CV.push_back(ConstantFP::get(OpNTy, 0.0));
|
||||
} else {
|
||||
CV.push_back(ConstantFP::get(OpNTy, BitsToFloat(1U << 31)));
|
||||
CV.push_back(ConstantFP::get(OpNTy, 0.0));
|
||||
CV.push_back(ConstantFP::get(OpNTy, 0.0));
|
||||
CV.push_back(ConstantFP::get(OpNTy, 0.0));
|
||||
}
|
||||
Constant *CS = ConstantStruct::get(CV);
|
||||
SDOperand CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4);
|
||||
SDOperand Mask
|
||||
= DAG.getNode(X86ISD::LOAD_PACK,
|
||||
VT, DAG.getEntryNode(), CPIdx, DAG.getSrcValue(NULL));
|
||||
return DAG.getNode(X86ISD::FXOR, VT, Op.getOperand(0), Mask);
|
||||
}
|
||||
case ISD::SETCC: {
|
||||
assert(Op.getValueType() == MVT::i8 && "SetCC type must be 8-bit integer");
|
||||
SDOperand Cond;
|
||||
|
@ -1923,6 +1961,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
|
|||
case X86ISD::SHLD: return "X86ISD::SHLD";
|
||||
case X86ISD::SHRD: return "X86ISD::SHRD";
|
||||
case X86ISD::FAND: return "X86ISD::FAND";
|
||||
case X86ISD::FXOR: return "X86ISD::FXOR";
|
||||
case X86ISD::FILD: return "X86ISD::FILD";
|
||||
case X86ISD::FP_TO_INT16_IN_MEM: return "X86ISD::FP_TO_INT16_IN_MEM";
|
||||
case X86ISD::FP_TO_INT32_IN_MEM: return "X86ISD::FP_TO_INT32_IN_MEM";
|
||||
|
@ -1942,6 +1981,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
|
|||
case X86ISD::RET_FLAG: return "X86ISD::RET_FLAG";
|
||||
case X86ISD::REP_STOS: return "X86ISD::RET_STOS";
|
||||
case X86ISD::REP_MOVS: return "X86ISD::RET_MOVS";
|
||||
case X86ISD::LOAD_PACK: return "X86ISD::LOAD_PACK";
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -45,6 +45,10 @@ namespace llvm {
|
|||
/// to X86::ANDPS or X86::ANDPD.
|
||||
FAND,
|
||||
|
||||
/// FXOR - Bitwise logical XOR of floating point values. This corresponds
|
||||
/// to X86::XORPS or X86::XORPD.
|
||||
FXOR,
|
||||
|
||||
/// FILD - This instruction implements SINT_TO_FP with the integer source
|
||||
/// in memory and FP reg result. This corresponds to the X86::FILD*m
|
||||
/// instructions. It has three inputs (token chain, address, and source
|
||||
|
@ -137,6 +141,10 @@ namespace llvm {
|
|||
|
||||
/// REP_MOVS - Repeat move, corresponds to X86::REP_MOVSx.
|
||||
REP_MOVS,
|
||||
|
||||
/// LOAD_PACK Load a 128-bit packed float / double value. It has the same
|
||||
/// operands as a normal load.
|
||||
LOAD_PACK,
|
||||
};
|
||||
|
||||
// X86 specific condition code. These correspond to X86_*_COND in
|
||||
|
|
|
@ -70,6 +70,8 @@ def X86shrd : SDNode<"X86ISD::SHRD", SDTIntShiftDOp>;
|
|||
|
||||
def X86fand : SDNode<"X86ISD::FAND", SDTFPBinOp,
|
||||
[SDNPCommutative, SDNPAssociative]>;
|
||||
def X86fxor : SDNode<"X86ISD::FXOR", SDTFPBinOp,
|
||||
[SDNPCommutative, SDNPAssociative]>;
|
||||
|
||||
def X86cmp : SDNode<"X86ISD::CMP" , SDTX86CmpTest,
|
||||
[SDNPOutFlag]>;
|
||||
|
@ -122,6 +124,9 @@ def X86rep_movs: SDNode<"X86ISD::REP_MOVS", SDTX86RepStr,
|
|||
def X86rdtsc : SDNode<"X86ISD::RDTSC_DAG",SDTX86RdTsc,
|
||||
[SDNPHasChain, SDNPOutFlag]>;
|
||||
|
||||
def X86loadp : SDNode<"X86ISD::LOAD_PACK", SDTLoad,
|
||||
[SDNPHasChain]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// X86 Operand Definitions.
|
||||
//
|
||||
|
@ -140,7 +145,7 @@ def i32mem : X86MemOperand<"printi32mem">;
|
|||
def i64mem : X86MemOperand<"printi64mem">;
|
||||
def f32mem : X86MemOperand<"printf32mem">;
|
||||
def f64mem : X86MemOperand<"printf64mem">;
|
||||
def f80mem : X86MemOperand<"printf80mem">;
|
||||
def f128mem : X86MemOperand<"printf128mem">;
|
||||
|
||||
def SSECC : Operand<i8> {
|
||||
let PrintMethod = "printSSECC";
|
||||
|
@ -357,6 +362,9 @@ def zextloadi32i16 : PatFrag<(ops node:$ptr), (i32 (zextload node:$ptr, i16))>;
|
|||
def extloadi8i1 : PatFrag<(ops node:$ptr), (i8 (extload node:$ptr, i1))>;
|
||||
def extloadf64f32 : PatFrag<(ops node:$ptr), (f64 (extload node:$ptr, f32))>;
|
||||
|
||||
def X86loadpf32 : PatFrag<(ops node:$ptr), (f32 (X86loadp node:$ptr))>;
|
||||
def X86loadpf64 : PatFrag<(ops node:$ptr), (f64 (X86loadp node:$ptr))>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Instruction templates...
|
||||
|
||||
|
@ -2566,43 +2574,51 @@ def ORPDrr : I<0x56, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
|
|||
"orpd {$src2, $dst|$dst, $src2}", []>,
|
||||
Requires<[HasSSE2]>, TB, OpSize;
|
||||
def XORPSrr : I<0x57, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
|
||||
"xorps {$src2, $dst|$dst, $src2}", []>,
|
||||
"xorps {$src2, $dst|$dst, $src2}",
|
||||
[(set FR32:$dst, (X86fxor FR32:$src1, FR32:$src2))]>,
|
||||
Requires<[HasSSE1]>, TB;
|
||||
def XORPDrr : I<0x57, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
|
||||
"xorpd {$src2, $dst|$dst, $src2}", []>,
|
||||
"xorpd {$src2, $dst|$dst, $src2}",
|
||||
[(set FR64:$dst, (X86fxor FR64:$src1, FR64:$src2))]>,
|
||||
Requires<[HasSSE2]>, TB, OpSize;
|
||||
}
|
||||
def ANDPSrm : I<0x54, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f32mem:$src2),
|
||||
def ANDPSrm : I<0x54, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2),
|
||||
"andps {$src2, $dst|$dst, $src2}",
|
||||
[]>,
|
||||
[(set FR32:$dst, (X86fand FR32:$src1,
|
||||
(X86loadpf32 addr:$src2)))]>,
|
||||
Requires<[HasSSE1]>, TB;
|
||||
def ANDPDrm : I<0x54, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f64mem:$src2),
|
||||
def ANDPDrm : I<0x54, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2),
|
||||
"andpd {$src2, $dst|$dst, $src2}",
|
||||
[]>,
|
||||
[(set FR64:$dst, (X86fand FR64:$src1,
|
||||
(X86loadpf64 addr:$src2)))]>,
|
||||
Requires<[HasSSE2]>, TB, OpSize;
|
||||
def ORPSrm : I<0x56, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f32mem:$src2),
|
||||
def ORPSrm : I<0x56, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2),
|
||||
"orps {$src2, $dst|$dst, $src2}", []>,
|
||||
Requires<[HasSSE1]>, TB;
|
||||
def ORPDrm : I<0x56, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f64mem:$src2),
|
||||
def ORPDrm : I<0x56, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2),
|
||||
"orpd {$src2, $dst|$dst, $src2}", []>,
|
||||
Requires<[HasSSE2]>, TB, OpSize;
|
||||
def XORPSrm : I<0x57, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f32mem:$src2),
|
||||
"xorps {$src2, $dst|$dst, $src2}", []>,
|
||||
def XORPSrm : I<0x57, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2),
|
||||
"xorps {$src2, $dst|$dst, $src2}",
|
||||
[(set FR32:$dst, (X86fxor FR32:$src1,
|
||||
(X86loadpf32 addr:$src2)))]>,
|
||||
Requires<[HasSSE1]>, TB;
|
||||
def XORPDrm : I<0x57, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f64mem:$src2),
|
||||
"xorpd {$src2, $dst|$dst, $src2}", []>,
|
||||
def XORPDrm : I<0x57, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2),
|
||||
"xorpd {$src2, $dst|$dst, $src2}",
|
||||
[(set FR64:$dst, (X86fxor FR64:$src1,
|
||||
(X86loadpf64 addr:$src2)))]>,
|
||||
Requires<[HasSSE2]>, TB, OpSize;
|
||||
|
||||
def ANDNPSrr : I<0x55, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
|
||||
"andnps {$src2, $dst|$dst, $src2}", []>,
|
||||
Requires<[HasSSE1]>, TB;
|
||||
def ANDNPSrm : I<0x55, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f32mem:$src2),
|
||||
def ANDNPSrm : I<0x55, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2),
|
||||
"andnps {$src2, $dst|$dst, $src2}", []>,
|
||||
Requires<[HasSSE1]>, TB;
|
||||
def ANDNPDrr : I<0x55, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
|
||||
"andnpd {$src2, $dst|$dst, $src2}", []>,
|
||||
Requires<[HasSSE2]>, TB, OpSize;
|
||||
def ANDNPDrm : I<0x55, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f64mem:$src2),
|
||||
def ANDNPDrm : I<0x55, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2),
|
||||
"andnpd {$src2, $dst|$dst, $src2}", []>,
|
||||
Requires<[HasSSE2]>, TB, OpSize;
|
||||
|
||||
|
@ -2981,6 +2997,42 @@ def FLDCW16m : I<0xD9, MRM5m, // X87 control world = [mem16]
|
|||
(ops i16mem:$dst), "fldcw $dst", []>;
|
||||
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// XMM Packed Floating point support (requires SSE / SSE2)
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def MOVAPSrr : I<0x28, MRMSrcMem, (ops V4F4:$dst, V4F4:$src),
|
||||
"movaps {$src, $dst|$dst, $src}", []>,
|
||||
Requires<[HasSSE1]>, XS;
|
||||
def MOVAPDrr : I<0x28, MRMSrcMem, (ops V2F8:$dst, V2F8:$src),
|
||||
"movapd {$src, $dst|$dst, $src}", []>,
|
||||
Requires<[HasSSE2]>, XD;
|
||||
|
||||
def MOVAPSrm : I<0x28, MRMSrcMem, (ops V4F4:$dst, f128mem:$src),
|
||||
"movaps {$src, $dst|$dst, $src}", []>,
|
||||
Requires<[HasSSE1]>, XS;
|
||||
def MOVAPSmr : I<0x29, MRMDestMem, (ops f128mem:$dst, V4F4:$src),
|
||||
"movaps {$src, $dst|$dst, $src}",[]>,
|
||||
Requires<[HasSSE1]>, XD;
|
||||
def MOVAPDrm : I<0x28, MRMSrcMem, (ops V2F8:$dst, f128mem:$src),
|
||||
"movapd {$src, $dst|$dst, $src}", []>,
|
||||
Requires<[HasSSE1]>, XD;
|
||||
def MOVAPDmr : I<0x29, MRMDestMem, (ops f128mem:$dst, V2F8:$src),
|
||||
"movapd {$src, $dst|$dst, $src}",[]>,
|
||||
Requires<[HasSSE2]>, XD;
|
||||
|
||||
// Pseudo-instructions to load FR32 / FR64 from f128mem using movaps / movapd.
|
||||
// Upper bits are disregarded.
|
||||
def MOVSAPSrm : I<0x28, MRMSrcMem, (ops FR32:$dst, f128mem:$src),
|
||||
"movaps {$src, $dst|$dst, $src}",
|
||||
[(set FR32:$dst, (X86loadpf32 addr:$src))]>,
|
||||
Requires<[HasSSE1]>, XS;
|
||||
def MOVSAPDrm : I<0x28, MRMSrcMem, (ops FR64:$dst, f128mem:$src),
|
||||
"movapd {$src, $dst|$dst, $src}",
|
||||
[(set FR64:$dst, (X86loadpf64 addr:$src))]>,
|
||||
Requires<[HasSSE1]>, XD;
|
||||
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Miscellaneous Instructions
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
|
|
@ -76,8 +76,8 @@ struct X86IntelAsmPrinter : public X86SharedAsmPrinter {
|
|||
O << "QWORD PTR ";
|
||||
printMemReference(MI, OpNo);
|
||||
}
|
||||
void printf80mem(const MachineInstr *MI, unsigned OpNo) {
|
||||
O << "XWORD PTR ";
|
||||
void printf128mem(const MachineInstr *MI, unsigned OpNo) {
|
||||
O << "XMMWORD PTR ";
|
||||
printMemReference(MI, OpNo);
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue