forked from OSchip/llvm-project
Define non-intrinsic instructions for vector min, max, sqrt, rsqrt, and rcp,
in addition to the intrinsic forms. Add spill-folding entries for these new instructions, and for the scalar min and max instrinsic instructions which were missing. And add some preliminary ISelLowering code for using the new non-intrinsic vector sqrt instruction, and fneg and fabs. llvm-svn: 38478
This commit is contained in:
parent
7ee197ecf2
commit
57111e7a60
|
@ -331,6 +331,13 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM)
|
|||
setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Expand);
|
||||
setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Expand);
|
||||
setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Expand);
|
||||
setOperationAction(ISD::FABS, (MVT::ValueType)VT, Expand);
|
||||
setOperationAction(ISD::FSIN, (MVT::ValueType)VT, Expand);
|
||||
setOperationAction(ISD::FCOS, (MVT::ValueType)VT, Expand);
|
||||
setOperationAction(ISD::FREM, (MVT::ValueType)VT, Expand);
|
||||
setOperationAction(ISD::FPOWI, (MVT::ValueType)VT, Expand);
|
||||
setOperationAction(ISD::FSQRT, (MVT::ValueType)VT, Expand);
|
||||
setOperationAction(ISD::FCOPYSIGN, (MVT::ValueType)VT, Expand);
|
||||
}
|
||||
|
||||
if (Subtarget->hasMMX()) {
|
||||
|
@ -408,6 +415,9 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM)
|
|||
setOperationAction(ISD::FSUB, MVT::v4f32, Legal);
|
||||
setOperationAction(ISD::FMUL, MVT::v4f32, Legal);
|
||||
setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
|
||||
setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
|
||||
setOperationAction(ISD::FNEG, MVT::v4f32, Custom);
|
||||
setOperationAction(ISD::FABS, MVT::v4f32, Custom);
|
||||
setOperationAction(ISD::LOAD, MVT::v4f32, Legal);
|
||||
setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
|
||||
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom);
|
||||
|
@ -435,6 +445,9 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM)
|
|||
setOperationAction(ISD::FSUB, MVT::v2f64, Legal);
|
||||
setOperationAction(ISD::FMUL, MVT::v2f64, Legal);
|
||||
setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
|
||||
setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
|
||||
setOperationAction(ISD::FNEG, MVT::v2f64, Custom);
|
||||
setOperationAction(ISD::FABS, MVT::v2f64, Custom);
|
||||
|
||||
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Custom);
|
||||
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Custom);
|
||||
|
@ -3326,16 +3339,21 @@ SDOperand X86TargetLowering::LowerFP_TO_SINT(SDOperand Op, SelectionDAG &DAG) {
|
|||
|
||||
SDOperand X86TargetLowering::LowerFABS(SDOperand Op, SelectionDAG &DAG) {
|
||||
MVT::ValueType VT = Op.getValueType();
|
||||
const Type *OpNTy = MVT::getTypeForValueType(VT);
|
||||
MVT::ValueType EltVT = VT;
|
||||
if (MVT::isVector(VT))
|
||||
EltVT = MVT::getVectorElementType(VT);
|
||||
const Type *OpNTy = MVT::getTypeForValueType(EltVT);
|
||||
std::vector<Constant*> CV;
|
||||
if (VT == MVT::f64) {
|
||||
CV.push_back(ConstantFP::get(OpNTy, BitsToDouble(~(1ULL << 63))));
|
||||
CV.push_back(ConstantFP::get(OpNTy, 0.0));
|
||||
if (EltVT == MVT::f64) {
|
||||
Constant *C = ConstantFP::get(OpNTy, BitsToDouble(~(1ULL << 63)));
|
||||
CV.push_back(C);
|
||||
CV.push_back(C);
|
||||
} else {
|
||||
CV.push_back(ConstantFP::get(OpNTy, BitsToFloat(~(1U << 31))));
|
||||
CV.push_back(ConstantFP::get(OpNTy, 0.0));
|
||||
CV.push_back(ConstantFP::get(OpNTy, 0.0));
|
||||
CV.push_back(ConstantFP::get(OpNTy, 0.0));
|
||||
Constant *C = ConstantFP::get(OpNTy, BitsToFloat(~(1U << 31)));
|
||||
CV.push_back(C);
|
||||
CV.push_back(C);
|
||||
CV.push_back(C);
|
||||
CV.push_back(C);
|
||||
}
|
||||
Constant *CS = ConstantStruct::get(CV);
|
||||
SDOperand CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4);
|
||||
|
@ -3350,16 +3368,21 @@ SDOperand X86TargetLowering::LowerFABS(SDOperand Op, SelectionDAG &DAG) {
|
|||
|
||||
SDOperand X86TargetLowering::LowerFNEG(SDOperand Op, SelectionDAG &DAG) {
|
||||
MVT::ValueType VT = Op.getValueType();
|
||||
const Type *OpNTy = MVT::getTypeForValueType(VT);
|
||||
MVT::ValueType EltVT = VT;
|
||||
if (MVT::isVector(VT))
|
||||
EltVT = MVT::getVectorElementType(VT);
|
||||
const Type *OpNTy = MVT::getTypeForValueType(EltVT);
|
||||
std::vector<Constant*> CV;
|
||||
if (VT == MVT::f64) {
|
||||
CV.push_back(ConstantFP::get(OpNTy, BitsToDouble(1ULL << 63)));
|
||||
CV.push_back(ConstantFP::get(OpNTy, 0.0));
|
||||
if (EltVT == MVT::f64) {
|
||||
Constant *C = ConstantFP::get(OpNTy, BitsToDouble(1ULL << 63));
|
||||
CV.push_back(C);
|
||||
CV.push_back(C);
|
||||
} else {
|
||||
CV.push_back(ConstantFP::get(OpNTy, BitsToFloat(1U << 31)));
|
||||
CV.push_back(ConstantFP::get(OpNTy, 0.0));
|
||||
CV.push_back(ConstantFP::get(OpNTy, 0.0));
|
||||
CV.push_back(ConstantFP::get(OpNTy, 0.0));
|
||||
Constant *C = ConstantFP::get(OpNTy, BitsToFloat(1U << 31));
|
||||
CV.push_back(C);
|
||||
CV.push_back(C);
|
||||
CV.push_back(C);
|
||||
CV.push_back(C);
|
||||
}
|
||||
Constant *CS = ConstantStruct::get(CV);
|
||||
SDOperand CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4);
|
||||
|
@ -4284,6 +4307,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
|
|||
case X86ISD::PINSRW: return "X86ISD::PINSRW";
|
||||
case X86ISD::FMAX: return "X86ISD::FMAX";
|
||||
case X86ISD::FMIN: return "X86ISD::FMIN";
|
||||
case X86ISD::FRSQRT: return "X86ISD::FRSQRT";
|
||||
case X86ISD::FRCP: return "X86ISD::FRCP";
|
||||
case X86ISD::TLSADDR: return "X86ISD::TLSADDR";
|
||||
case X86ISD::THREAD_POINTER: return "X86ISD::THREAD_POINTER";
|
||||
}
|
||||
|
|
|
@ -177,6 +177,12 @@ namespace llvm {
|
|||
/// FMAX, FMIN - Floating point max and min.
|
||||
///
|
||||
FMAX, FMIN,
|
||||
|
||||
/// FRSQRT, FRCP - Floating point reciprocal-sqrt and reciprocal
|
||||
/// approximation. Note that these typically require refinement
|
||||
/// in order to obtain suitable precision.
|
||||
FRSQRT, FRCP,
|
||||
|
||||
// Thread Local Storage
|
||||
TLSADDR, THREAD_POINTER
|
||||
};
|
||||
|
|
|
@ -31,6 +31,8 @@ def X86for : SDNode<"X86ISD::FOR", SDTFPBinOp,
|
|||
[SDNPCommutative, SDNPAssociative]>;
|
||||
def X86fxor : SDNode<"X86ISD::FXOR", SDTFPBinOp,
|
||||
[SDNPCommutative, SDNPAssociative]>;
|
||||
def X86frsqrt : SDNode<"X86ISD::FRSQRT", SDTFPUnaryOp>;
|
||||
def X86frcp : SDNode<"X86ISD::FRCP", SDTFPUnaryOp>;
|
||||
def X86fsrl : SDNode<"X86ISD::FSRL", SDTX86FPShiftOp>;
|
||||
def X86comi : SDNode<"X86ISD::COMI", SDTX86CmpTest,
|
||||
[SDNPHasChain, SDNPOutFlag]>;
|
||||
|
@ -247,16 +249,6 @@ class PSI<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
|
|||
class PSIi8<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
|
||||
: Ii8<o, F, ops, asm, pattern>, TB, Requires<[HasSSE1]>;
|
||||
|
||||
// Helpers for defining instructions that directly correspond to intrinsics.
|
||||
multiclass SS_IntUnary<bits<8> o, string OpcodeStr, Intrinsic IntId> {
|
||||
def r : SSI<o, MRMSrcReg, (ops VR128:$dst, VR128:$src),
|
||||
!strconcat(OpcodeStr, " {$src, $dst|$dst, $src}"),
|
||||
[(set VR128:$dst, (v4f32 (IntId VR128:$src)))]>;
|
||||
def m : SSI<o, MRMSrcMem, (ops VR128:$dst, ssmem:$src),
|
||||
!strconcat(OpcodeStr, " {$src, $dst|$dst, $src}"),
|
||||
[(set VR128:$dst, (v4f32 (IntId sse_load_f32:$src)))]>;
|
||||
}
|
||||
|
||||
// Move Instructions
|
||||
def MOVSSrr : SSI<0x10, MRMSrcReg, (ops FR32:$dst, FR32:$src),
|
||||
"movss {$src, $dst|$dst, $src}", []>;
|
||||
|
@ -267,18 +259,6 @@ def MOVSSmr : SSI<0x11, MRMDestMem, (ops f32mem:$dst, FR32:$src),
|
|||
"movss {$src, $dst|$dst, $src}",
|
||||
[(store FR32:$src, addr:$dst)]>;
|
||||
|
||||
def SQRTSSr : SSI<0x51, MRMSrcReg, (ops FR32:$dst, FR32:$src),
|
||||
"sqrtss {$src, $dst|$dst, $src}",
|
||||
[(set FR32:$dst, (fsqrt FR32:$src))]>;
|
||||
def SQRTSSm : SSI<0x51, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
|
||||
"sqrtss {$src, $dst|$dst, $src}",
|
||||
[(set FR32:$dst, (fsqrt (loadf32 addr:$src)))]>;
|
||||
|
||||
// Aliases to match intrinsics which expect XMM operand(s).
|
||||
defm SQRTSS_Int : SS_IntUnary<0x51, "sqrtss" , int_x86_sse_sqrt_ss>;
|
||||
defm RSQRTSS_Int : SS_IntUnary<0x52, "rsqrtss", int_x86_sse_rsqrt_ss>;
|
||||
defm RCPSS_Int : SS_IntUnary<0x53, "rcpss" , int_x86_sse_rcp_ss>;
|
||||
|
||||
// Conversion instructions
|
||||
def CVTTSS2SIrr : SSI<0x2C, MRMSrcReg, (ops GR32:$dst, FR32:$src),
|
||||
"cvttss2si {$src, $dst|$dst, $src}",
|
||||
|
@ -425,20 +405,20 @@ def FsANDNPSrm : PSI<0x55, MRMSrcMem,
|
|||
"andnps {$src2, $dst|$dst, $src2}", []>;
|
||||
}
|
||||
|
||||
/// scalar_sse1_fp_binop_rm - Scalar SSE1 binops come in three basic forms:
|
||||
///
|
||||
/// 1. f32 - This comes in SSE1 form for floats.
|
||||
/// 2. rr vs rm - They include a reg+reg form and a reg+mem form.
|
||||
/// basic_sse1_fp_binop_rm - SSE1 binops come in both scalar and vector forms.
|
||||
///
|
||||
/// In addition, scalar SSE ops have an intrinsic form. This form is unlike the
|
||||
/// normal form, in that they take an entire vector (instead of a scalar) and
|
||||
/// leave the top elements undefined. This adds another two variants of the
|
||||
/// above permutations, giving us 8 forms for 'instruction'.
|
||||
/// In addition, we also have a special variant of the scalar form here to
|
||||
/// represent the associated intrinsic operation. This form is unlike the
|
||||
/// plain scalar form, in that it takes an entire vector (instead of a scalar)
|
||||
/// and leaves the top elements undefined.
|
||||
///
|
||||
/// These three forms can each be reg+reg or reg+mem, so there are a total of
|
||||
/// six "instructions".
|
||||
///
|
||||
let isTwoAddress = 1 in {
|
||||
multiclass scalar_sse1_fp_binop_rm<bits<8> opc, string OpcodeStr,
|
||||
SDNode OpNode, Intrinsic F32Int,
|
||||
bit Commutable = 0> {
|
||||
multiclass basic_sse1_fp_binop_rm<bits<8> opc, string OpcodeStr,
|
||||
SDNode OpNode, Intrinsic F32Int,
|
||||
bit Commutable = 0> {
|
||||
// Scalar operation, reg+reg.
|
||||
def SSrr : SSI<opc, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
|
||||
!strconcat(OpcodeStr, "ss {$src2, $dst|$dst, $src2}"),
|
||||
|
@ -451,14 +431,26 @@ multiclass scalar_sse1_fp_binop_rm<bits<8> opc, string OpcodeStr,
|
|||
!strconcat(OpcodeStr, "ss {$src2, $dst|$dst, $src2}"),
|
||||
[(set FR32:$dst, (OpNode FR32:$src1, (load addr:$src2)))]>;
|
||||
|
||||
// Vector intrinsic operation, reg+reg.
|
||||
// Vector operation, reg+reg.
|
||||
def PSrr : PSI<opc, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
|
||||
!strconcat(OpcodeStr, "ps {$src2, $dst|$dst, $src2}"),
|
||||
[(set VR128:$dst, (v4f32 (OpNode VR128:$src1, VR128:$src2)))]> {
|
||||
let isCommutable = Commutable;
|
||||
}
|
||||
|
||||
// Vector operation, reg+mem.
|
||||
def PSrm : PSI<opc, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
|
||||
!strconcat(OpcodeStr, "ps {$src2, $dst|$dst, $src2}"),
|
||||
[(set VR128:$dst, (OpNode VR128:$src1, (loadv4f32 addr:$src2)))]>;
|
||||
|
||||
// Intrinsic operation, reg+reg.
|
||||
def SSrr_Int : SSI<opc, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
|
||||
!strconcat(OpcodeStr, "ss {$src2, $dst|$dst, $src2}"),
|
||||
[(set VR128:$dst, (F32Int VR128:$src1, VR128:$src2))]> {
|
||||
let isCommutable = Commutable;
|
||||
}
|
||||
|
||||
// Vector intrinsic operation, reg+mem.
|
||||
// Intrinsic operation, reg+mem.
|
||||
def SSrm_Int : SSI<opc, MRMSrcMem, (ops VR128:$dst, VR128:$src1, ssmem:$src2),
|
||||
!strconcat(OpcodeStr, "ss {$src2, $dst|$dst, $src2}"),
|
||||
[(set VR128:$dst, (F32Int VR128:$src1,
|
||||
|
@ -467,13 +459,82 @@ multiclass scalar_sse1_fp_binop_rm<bits<8> opc, string OpcodeStr,
|
|||
}
|
||||
|
||||
// Arithmetic instructions
|
||||
defm ADD : scalar_sse1_fp_binop_rm<0x58, "add", fadd, int_x86_sse_add_ss, 1>;
|
||||
defm MUL : scalar_sse1_fp_binop_rm<0x59, "mul", fmul, int_x86_sse_mul_ss, 1>;
|
||||
defm SUB : scalar_sse1_fp_binop_rm<0x5C, "sub", fsub, int_x86_sse_sub_ss>;
|
||||
defm DIV : scalar_sse1_fp_binop_rm<0x5E, "div", fdiv, int_x86_sse_div_ss>;
|
||||
defm ADD : basic_sse1_fp_binop_rm<0x58, "add", fadd, int_x86_sse_add_ss, 1>;
|
||||
defm MUL : basic_sse1_fp_binop_rm<0x59, "mul", fmul, int_x86_sse_mul_ss, 1>;
|
||||
defm SUB : basic_sse1_fp_binop_rm<0x5C, "sub", fsub, int_x86_sse_sub_ss>;
|
||||
defm DIV : basic_sse1_fp_binop_rm<0x5E, "div", fdiv, int_x86_sse_div_ss>;
|
||||
|
||||
defm MAX : scalar_sse1_fp_binop_rm<0x5F, "max", X86fmax, int_x86_sse_max_ss>;
|
||||
defm MIN : scalar_sse1_fp_binop_rm<0x5D, "min", X86fmin, int_x86_sse_min_ss>;
|
||||
/// sse1_fp_binop_rm - Other SSE1 binops
|
||||
///
|
||||
/// This multiclass is like basic_sse1_fp_binop_rm, with the addition of
|
||||
/// instructions for a full-vector intrinsic form. Operations that map
|
||||
/// onto C operators don't use this form since they just use the plain
|
||||
/// vector form instead of having a separate vector intrinsic form.
|
||||
///
|
||||
/// This provides a total of eight "instructions".
|
||||
///
|
||||
let isTwoAddress = 1 in {
|
||||
multiclass sse1_fp_binop_rm<bits<8> opc, string OpcodeStr,
|
||||
SDNode OpNode,
|
||||
Intrinsic F32Int,
|
||||
Intrinsic V4F32Int,
|
||||
bit Commutable = 0> {
|
||||
|
||||
// Scalar operation, reg+reg.
|
||||
def SSrr : SSI<opc, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
|
||||
!strconcat(OpcodeStr, "ss {$src2, $dst|$dst, $src2}"),
|
||||
[(set FR32:$dst, (OpNode FR32:$src1, FR32:$src2))]> {
|
||||
let isCommutable = Commutable;
|
||||
}
|
||||
|
||||
// Scalar operation, reg+mem.
|
||||
def SSrm : SSI<opc, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f32mem:$src2),
|
||||
!strconcat(OpcodeStr, "ss {$src2, $dst|$dst, $src2}"),
|
||||
[(set FR32:$dst, (OpNode FR32:$src1, (load addr:$src2)))]>;
|
||||
|
||||
// Vector operation, reg+reg.
|
||||
def PSrr : PSI<opc, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
|
||||
!strconcat(OpcodeStr, "ps {$src2, $dst|$dst, $src2}"),
|
||||
[(set VR128:$dst, (v4f32 (OpNode VR128:$src1, VR128:$src2)))]> {
|
||||
let isCommutable = Commutable;
|
||||
}
|
||||
|
||||
// Vector operation, reg+mem.
|
||||
def PSrm : PSI<opc, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
|
||||
!strconcat(OpcodeStr, "ps {$src2, $dst|$dst, $src2}"),
|
||||
[(set VR128:$dst, (OpNode VR128:$src1, (loadv4f32 addr:$src2)))]>;
|
||||
|
||||
// Intrinsic operation, reg+reg.
|
||||
def SSrr_Int : SSI<opc, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
|
||||
!strconcat(OpcodeStr, "ss {$src2, $dst|$dst, $src2}"),
|
||||
[(set VR128:$dst, (F32Int VR128:$src1, VR128:$src2))]> {
|
||||
let isCommutable = Commutable;
|
||||
}
|
||||
|
||||
// Intrinsic operation, reg+mem.
|
||||
def SSrm_Int : SSI<opc, MRMSrcMem, (ops VR128:$dst, VR128:$src1, ssmem:$src2),
|
||||
!strconcat(OpcodeStr, "ss {$src2, $dst|$dst, $src2}"),
|
||||
[(set VR128:$dst, (F32Int VR128:$src1,
|
||||
sse_load_f32:$src2))]>;
|
||||
|
||||
// Vector intrinsic operation, reg+reg.
|
||||
def PSrr_Int : PSI<opc, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
|
||||
!strconcat(OpcodeStr, "ps {$src2, $dst|$dst, $src2}"),
|
||||
[(set VR128:$dst, (V4F32Int VR128:$src1, VR128:$src2))]> {
|
||||
let isCommutable = Commutable;
|
||||
}
|
||||
|
||||
// Vector intrinsic operation, reg+mem.
|
||||
def PSrm_Int : PSI<opc, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f32mem:$src2),
|
||||
!strconcat(OpcodeStr, "ps {$src2, $dst|$dst, $src2}"),
|
||||
[(set VR128:$dst, (V4F32Int VR128:$src1, (load addr:$src2)))]>;
|
||||
}
|
||||
}
|
||||
|
||||
defm MAX : sse1_fp_binop_rm<0x5F, "max", X86fmax,
|
||||
int_x86_sse_max_ss, int_x86_sse_max_ps>;
|
||||
defm MIN : sse1_fp_binop_rm<0x5D, "min", X86fmin,
|
||||
int_x86_sse_min_ss, int_x86_sse_min_ps>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// SSE packed FP Instructions
|
||||
|
@ -550,70 +611,85 @@ def MOVHLPSrr : PSI<0x12, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
|
|||
|
||||
|
||||
|
||||
/// packed_sse1_fp_binop_rm - Packed SSE binops come in three basic forms:
|
||||
/// 1. v4f32 - This comes in SSE1 form for float.
|
||||
/// 2. rr vs rm - They include a reg+reg form and a ref+mem form.
|
||||
// Arithmetic
|
||||
|
||||
/// sse1_fp_unop_rm - SSE1 unops come in both scalar and vector forms.
|
||||
///
|
||||
let isTwoAddress = 1 in {
|
||||
multiclass packed_sse1_fp_binop_rm<bits<8> opc, string OpcodeStr,
|
||||
SDNode OpNode, bit Commutable = 0> {
|
||||
// Packed operation, reg+reg.
|
||||
def PSrr : PSI<opc, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
|
||||
!strconcat(OpcodeStr, "ps {$src2, $dst|$dst, $src2}"),
|
||||
[(set VR128:$dst, (v4f32 (OpNode VR128:$src1, VR128:$src2)))]> {
|
||||
/// In addition, we also have a special variant of the scalar form here to
|
||||
/// represent the associated intrinsic operation. This form is unlike the
|
||||
/// plain scalar form, in that it takes an entire vector (instead of a
|
||||
/// scalar) and leaves the top elements undefined.
|
||||
///
|
||||
/// And, we have a special variant form for a full-vector intrinsic form.
|
||||
///
|
||||
/// These four forms can each have a reg or a mem operand, so there are a
|
||||
/// total of eight "instructions".
|
||||
///
|
||||
multiclass sse1_fp_unop_rm<bits<8> opc, string OpcodeStr,
|
||||
SDNode OpNode,
|
||||
Intrinsic F32Int,
|
||||
Intrinsic V4F32Int,
|
||||
bit Commutable = 0> {
|
||||
// Scalar operation, reg.
|
||||
def SSr : SSI<opc, MRMSrcReg, (ops FR32:$dst, FR32:$src),
|
||||
!strconcat(OpcodeStr, "ss {$src, $dst|$dst, $src}"),
|
||||
[(set FR32:$dst, (OpNode FR32:$src))]> {
|
||||
let isCommutable = Commutable;
|
||||
}
|
||||
|
||||
// Packed operation, reg+mem.
|
||||
def PSrm : PSI<opc, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
|
||||
!strconcat(OpcodeStr, "ps {$src2, $dst|$dst, $src2}"),
|
||||
[(set VR128:$dst, (OpNode VR128:$src1, (loadv4f32 addr:$src2)))]>;
|
||||
}
|
||||
}
|
||||
|
||||
defm ADD : packed_sse1_fp_binop_rm<0x58, "add", fadd, 1>;
|
||||
defm MUL : packed_sse1_fp_binop_rm<0x59, "mul", fmul, 1>;
|
||||
defm DIV : packed_sse1_fp_binop_rm<0x5E, "div", fdiv>;
|
||||
defm SUB : packed_sse1_fp_binop_rm<0x5C, "sub", fsub>;
|
||||
|
||||
// Arithmetic
|
||||
|
||||
class PS_Intr<bits<8> o, string OpcodeStr, Intrinsic IntId>
|
||||
: PSI<o, MRMSrcReg, (ops VR128:$dst, VR128:$src),
|
||||
!strconcat(OpcodeStr, " {$src, $dst|$dst, $src}"),
|
||||
[(set VR128:$dst, (IntId VR128:$src))]>;
|
||||
class PS_Intm<bits<8> o, string OpcodeStr, Intrinsic IntId>
|
||||
: PSI<o, MRMSrcMem, (ops VR128:$dst, f32mem:$src),
|
||||
!strconcat(OpcodeStr, " {$src, $dst|$dst, $src}"),
|
||||
[(set VR128:$dst, (IntId (load addr:$src)))]>;
|
||||
|
||||
class PS_Intrr<bits<8> o, string OpcodeStr, Intrinsic IntId>
|
||||
: PSI<o, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
|
||||
!strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
|
||||
[(set VR128:$dst, (IntId VR128:$src1, VR128:$src2))]>;
|
||||
class PS_Intrm<bits<8> o, string OpcodeStr, Intrinsic IntId>
|
||||
: PSI<o, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f32mem:$src2),
|
||||
!strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
|
||||
[(set VR128:$dst, (IntId VR128:$src1, (load addr:$src2)))]>;
|
||||
|
||||
def SQRTPSr : PS_Intr<0x51, "sqrtps", int_x86_sse_sqrt_ps>;
|
||||
def SQRTPSm : PS_Intm<0x51, "sqrtps", int_x86_sse_sqrt_ps>;
|
||||
|
||||
def RSQRTPSr : PS_Intr<0x52, "rsqrtps", int_x86_sse_rsqrt_ps>;
|
||||
def RSQRTPSm : PS_Intm<0x52, "rsqrtps", int_x86_sse_rsqrt_ps>;
|
||||
def RCPPSr : PS_Intr<0x53, "rcpps", int_x86_sse_rcp_ps>;
|
||||
def RCPPSm : PS_Intm<0x53, "rcpps", int_x86_sse_rcp_ps>;
|
||||
|
||||
let isTwoAddress = 1 in {
|
||||
let isCommutable = 1 in {
|
||||
def MAXPSrr : PS_Intrr<0x5F, "maxps", int_x86_sse_max_ps>;
|
||||
def MINPSrr : PS_Intrr<0x5D, "minps", int_x86_sse_min_ps>;
|
||||
// Scalar operation, mem.
|
||||
def SSm : SSI<opc, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
|
||||
!strconcat(OpcodeStr, "ss {$src, $dst|$dst, $src}"),
|
||||
[(set FR32:$dst, (OpNode (load addr:$src)))]>;
|
||||
|
||||
// Vector operation, reg.
|
||||
def PSr : PSI<opc, MRMSrcReg, (ops VR128:$dst, VR128:$src),
|
||||
!strconcat(OpcodeStr, "ps {$src, $dst|$dst, $src}"),
|
||||
[(set VR128:$dst, (v4f32 (OpNode VR128:$src)))]> {
|
||||
let isCommutable = Commutable;
|
||||
}
|
||||
|
||||
def MAXPSrm : PS_Intrm<0x5F, "maxps", int_x86_sse_max_ps>;
|
||||
def MINPSrm : PS_Intrm<0x5D, "minps", int_x86_sse_min_ps>;
|
||||
// Vector operation, mem.
|
||||
def PSm : PSI<opc, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
|
||||
!strconcat(OpcodeStr, "ps {$src, $dst|$dst, $src}"),
|
||||
[(set VR128:$dst, (OpNode (loadv4f32 addr:$src)))]>;
|
||||
|
||||
// Intrinsic operation, reg.
|
||||
def SSr_Int : SSI<opc, MRMSrcReg, (ops VR128:$dst, VR128:$src),
|
||||
!strconcat(OpcodeStr, "ss {$src, $dst|$dst, $src}"),
|
||||
[(set VR128:$dst, (F32Int VR128:$src))]> {
|
||||
let isCommutable = Commutable;
|
||||
}
|
||||
|
||||
// Intrinsic operation, mem.
|
||||
def SSm_Int : SSI<opc, MRMSrcMem, (ops VR128:$dst, ssmem:$src),
|
||||
!strconcat(OpcodeStr, "ss {$src, $dst|$dst, $src}"),
|
||||
[(set VR128:$dst, (F32Int sse_load_f32:$src))]>;
|
||||
|
||||
// Vector intrinsic operation, reg
|
||||
def PSr_Int : PSI<opc, MRMSrcReg, (ops VR128:$dst, VR128:$src),
|
||||
!strconcat(OpcodeStr, "ps {$src, $dst|$dst, $src}"),
|
||||
[(set VR128:$dst, (V4F32Int VR128:$src))]> {
|
||||
let isCommutable = Commutable;
|
||||
}
|
||||
|
||||
// Vector intrinsic operation, mem
|
||||
def PSm_Int : PSI<opc, MRMSrcMem, (ops VR128:$dst, f32mem:$src),
|
||||
!strconcat(OpcodeStr, "ps {$src, $dst|$dst, $src}"),
|
||||
[(set VR128:$dst, (V4F32Int (load addr:$src)))]>;
|
||||
}
|
||||
|
||||
// Square root.
|
||||
defm SQRT : sse1_fp_unop_rm<0x51, "sqrt", fsqrt,
|
||||
int_x86_sse_sqrt_ss, int_x86_sse_sqrt_ps>;
|
||||
|
||||
// Reciprocal approximations. Note that these typically require refinement
|
||||
// in order to obtain suitable precision.
|
||||
defm RSQRT : sse1_fp_unop_rm<0x52, "rsqrt", X86frsqrt,
|
||||
int_x86_sse_rsqrt_ss, int_x86_sse_rsqrt_ps>;
|
||||
defm RCP : sse1_fp_unop_rm<0x53, "rcp", X86frcp,
|
||||
int_x86_sse_rcp_ss, int_x86_sse_rcp_ps>;
|
||||
|
||||
// Logical
|
||||
let isTwoAddress = 1 in {
|
||||
let isCommutable = 1 in {
|
||||
|
@ -835,16 +911,6 @@ class PDI<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
|
|||
class PDIi8<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
|
||||
: Ii8<o, F, ops, asm, pattern>, TB, OpSize, Requires<[HasSSE2]>;
|
||||
|
||||
// Helpers for defining instructions that directly correspond to intrinsics.
|
||||
multiclass SD_IntUnary<bits<8> o, string OpcodeStr, Intrinsic IntId> {
|
||||
def r : SDI<o, MRMSrcReg, (ops VR128:$dst, VR128:$src),
|
||||
!strconcat(OpcodeStr, " {$src, $dst|$dst, $src}"),
|
||||
[(set VR128:$dst, (v2f64 (IntId VR128:$src)))]>;
|
||||
def m : SDI<o, MRMSrcMem, (ops VR128:$dst, sdmem:$src),
|
||||
!strconcat(OpcodeStr, " {$src, $dst|$dst, $src}"),
|
||||
[(set VR128:$dst, (v2f64 (IntId sse_load_f64:$src)))]>;
|
||||
}
|
||||
|
||||
// Move Instructions
|
||||
def MOVSDrr : SDI<0x10, MRMSrcReg, (ops FR64:$dst, FR64:$src),
|
||||
"movsd {$src, $dst|$dst, $src}", []>;
|
||||
|
@ -855,16 +921,6 @@ def MOVSDmr : SDI<0x11, MRMDestMem, (ops f64mem:$dst, FR64:$src),
|
|||
"movsd {$src, $dst|$dst, $src}",
|
||||
[(store FR64:$src, addr:$dst)]>;
|
||||
|
||||
def SQRTSDr : SDI<0x51, MRMSrcReg, (ops FR64:$dst, FR64:$src),
|
||||
"sqrtsd {$src, $dst|$dst, $src}",
|
||||
[(set FR64:$dst, (fsqrt FR64:$src))]>;
|
||||
def SQRTSDm : SDI<0x51, MRMSrcMem, (ops FR64:$dst, f64mem:$src),
|
||||
"sqrtsd {$src, $dst|$dst, $src}",
|
||||
[(set FR64:$dst, (fsqrt (loadf64 addr:$src)))]>;
|
||||
|
||||
// Aliases to match intrinsics which expect XMM operand(s).
|
||||
defm SQRTSD_Int : SD_IntUnary<0x51, "sqrtsd" , int_x86_sse2_sqrt_sd>;
|
||||
|
||||
// Conversion instructions
|
||||
def CVTTSD2SIrr : SDI<0x2C, MRMSrcReg, (ops GR32:$dst, FR64:$src),
|
||||
"cvttsd2si {$src, $dst|$dst, $src}",
|
||||
|
@ -1013,20 +1069,20 @@ def FsANDNPDrm : PDI<0x55, MRMSrcMem,
|
|||
"andnpd {$src2, $dst|$dst, $src2}", []>;
|
||||
}
|
||||
|
||||
/// scalar_sse2_fp_binop_rm - Scalar SSE2 binops come in three basic forms:
|
||||
///
|
||||
/// 1. f64 - This comes in SSE2 form for doubles.
|
||||
/// 2. rr vs rm - They include a reg+reg form and a reg+mem form.
|
||||
/// basic_sse2_fp_binop_rm - SSE2 binops come in both scalar and vector forms.
|
||||
///
|
||||
/// In addition, scalar SSE ops have an intrinsic form. This form is unlike the
|
||||
/// normal form, in that they take an entire vector (instead of a scalar) and
|
||||
/// leave the top elements undefined. This adds another two variants of the
|
||||
/// above permutations, giving us 8 forms for 'instruction'.
|
||||
/// In addition, we also have a special variant of the scalar form here to
|
||||
/// represent the associated intrinsic operation. This form is unlike the
|
||||
/// plain scalar form, in that it takes an entire vector (instead of a scalar)
|
||||
/// and leaves the top elements undefined.
|
||||
///
|
||||
/// These three forms can each be reg+reg or reg+mem, so there are a total of
|
||||
/// six "instructions".
|
||||
///
|
||||
let isTwoAddress = 1 in {
|
||||
multiclass scalar_sse2_fp_binop_rm<bits<8> opc, string OpcodeStr,
|
||||
SDNode OpNode, Intrinsic F64Int,
|
||||
bit Commutable = 0> {
|
||||
multiclass basic_sse2_fp_binop_rm<bits<8> opc, string OpcodeStr,
|
||||
SDNode OpNode, Intrinsic F64Int,
|
||||
bit Commutable = 0> {
|
||||
// Scalar operation, reg+reg.
|
||||
def SDrr : SDI<opc, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
|
||||
!strconcat(OpcodeStr, "sd {$src2, $dst|$dst, $src2}"),
|
||||
|
@ -1039,14 +1095,26 @@ multiclass scalar_sse2_fp_binop_rm<bits<8> opc, string OpcodeStr,
|
|||
!strconcat(OpcodeStr, "sd {$src2, $dst|$dst, $src2}"),
|
||||
[(set FR64:$dst, (OpNode FR64:$src1, (load addr:$src2)))]>;
|
||||
|
||||
// Vector intrinsic operation, reg+reg.
|
||||
// Vector operation, reg+reg.
|
||||
def PDrr : PDI<opc, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
|
||||
!strconcat(OpcodeStr, "pd {$src2, $dst|$dst, $src2}"),
|
||||
[(set VR128:$dst, (v2f64 (OpNode VR128:$src1, VR128:$src2)))]> {
|
||||
let isCommutable = Commutable;
|
||||
}
|
||||
|
||||
// Vector operation, reg+mem.
|
||||
def PDrm : PDI<opc, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
|
||||
!strconcat(OpcodeStr, "pd {$src2, $dst|$dst, $src2}"),
|
||||
[(set VR128:$dst, (OpNode VR128:$src1, (loadv2f64 addr:$src2)))]>;
|
||||
|
||||
// Intrinsic operation, reg+reg.
|
||||
def SDrr_Int : SDI<opc, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
|
||||
!strconcat(OpcodeStr, "sd {$src2, $dst|$dst, $src2}"),
|
||||
[(set VR128:$dst, (F64Int VR128:$src1, VR128:$src2))]> {
|
||||
let isCommutable = Commutable;
|
||||
}
|
||||
|
||||
// Vector intrinsic operation, reg+mem.
|
||||
// Intrinsic operation, reg+mem.
|
||||
def SDrm_Int : SDI<opc, MRMSrcMem, (ops VR128:$dst, VR128:$src1, sdmem:$src2),
|
||||
!strconcat(OpcodeStr, "sd {$src2, $dst|$dst, $src2}"),
|
||||
[(set VR128:$dst, (F64Int VR128:$src1,
|
||||
|
@ -1055,13 +1123,82 @@ multiclass scalar_sse2_fp_binop_rm<bits<8> opc, string OpcodeStr,
|
|||
}
|
||||
|
||||
// Arithmetic instructions
|
||||
defm ADD : scalar_sse2_fp_binop_rm<0x58, "add", fadd, int_x86_sse2_add_sd, 1>;
|
||||
defm MUL : scalar_sse2_fp_binop_rm<0x59, "mul", fmul, int_x86_sse2_mul_sd, 1>;
|
||||
defm SUB : scalar_sse2_fp_binop_rm<0x5C, "sub", fsub, int_x86_sse2_sub_sd>;
|
||||
defm DIV : scalar_sse2_fp_binop_rm<0x5E, "div", fdiv, int_x86_sse2_div_sd>;
|
||||
defm ADD : basic_sse2_fp_binop_rm<0x58, "add", fadd, int_x86_sse2_add_sd, 1>;
|
||||
defm MUL : basic_sse2_fp_binop_rm<0x59, "mul", fmul, int_x86_sse2_mul_sd, 1>;
|
||||
defm SUB : basic_sse2_fp_binop_rm<0x5C, "sub", fsub, int_x86_sse2_sub_sd>;
|
||||
defm DIV : basic_sse2_fp_binop_rm<0x5E, "div", fdiv, int_x86_sse2_div_sd>;
|
||||
|
||||
defm MAX : scalar_sse2_fp_binop_rm<0x5F, "max", X86fmax, int_x86_sse2_max_sd>;
|
||||
defm MIN : scalar_sse2_fp_binop_rm<0x5D, "min", X86fmin, int_x86_sse2_min_sd>;
|
||||
/// sse2_fp_binop_rm - Other SSE2 binops
|
||||
///
|
||||
/// This multiclass is like basic_sse2_fp_binop_rm, with the addition of
|
||||
/// instructions for a full-vector intrinsic form. Operations that map
|
||||
/// onto C operators don't use this form since they just use the plain
|
||||
/// vector form instead of having a separate vector intrinsic form.
|
||||
///
|
||||
/// This provides a total of eight "instructions".
|
||||
///
|
||||
let isTwoAddress = 1 in {
|
||||
multiclass sse2_fp_binop_rm<bits<8> opc, string OpcodeStr,
|
||||
SDNode OpNode,
|
||||
Intrinsic F64Int,
|
||||
Intrinsic V2F64Int,
|
||||
bit Commutable = 0> {
|
||||
|
||||
// Scalar operation, reg+reg.
|
||||
def SDrr : SDI<opc, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
|
||||
!strconcat(OpcodeStr, "sd {$src2, $dst|$dst, $src2}"),
|
||||
[(set FR64:$dst, (OpNode FR64:$src1, FR64:$src2))]> {
|
||||
let isCommutable = Commutable;
|
||||
}
|
||||
|
||||
// Scalar operation, reg+mem.
|
||||
def SDrm : SDI<opc, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f64mem:$src2),
|
||||
!strconcat(OpcodeStr, "sd {$src2, $dst|$dst, $src2}"),
|
||||
[(set FR64:$dst, (OpNode FR64:$src1, (load addr:$src2)))]>;
|
||||
|
||||
// Vector operation, reg+reg.
|
||||
def PDrr : PDI<opc, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
|
||||
!strconcat(OpcodeStr, "pd {$src2, $dst|$dst, $src2}"),
|
||||
[(set VR128:$dst, (v2f64 (OpNode VR128:$src1, VR128:$src2)))]> {
|
||||
let isCommutable = Commutable;
|
||||
}
|
||||
|
||||
// Vector operation, reg+mem.
|
||||
def PDrm : PDI<opc, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
|
||||
!strconcat(OpcodeStr, "pd {$src2, $dst|$dst, $src2}"),
|
||||
[(set VR128:$dst, (OpNode VR128:$src1, (loadv2f64 addr:$src2)))]>;
|
||||
|
||||
// Intrinsic operation, reg+reg.
|
||||
def SDrr_Int : SDI<opc, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
|
||||
!strconcat(OpcodeStr, "sd {$src2, $dst|$dst, $src2}"),
|
||||
[(set VR128:$dst, (F64Int VR128:$src1, VR128:$src2))]> {
|
||||
let isCommutable = Commutable;
|
||||
}
|
||||
|
||||
// Intrinsic operation, reg+mem.
|
||||
def SDrm_Int : SDI<opc, MRMSrcMem, (ops VR128:$dst, VR128:$src1, sdmem:$src2),
|
||||
!strconcat(OpcodeStr, "sd {$src2, $dst|$dst, $src2}"),
|
||||
[(set VR128:$dst, (F64Int VR128:$src1,
|
||||
sse_load_f64:$src2))]>;
|
||||
|
||||
// Vector intrinsic operation, reg+reg.
|
||||
def PDrr_Int : PDI<opc, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
|
||||
!strconcat(OpcodeStr, "pd {$src2, $dst|$dst, $src2}"),
|
||||
[(set VR128:$dst, (V2F64Int VR128:$src1, VR128:$src2))]> {
|
||||
let isCommutable = Commutable;
|
||||
}
|
||||
|
||||
// Vector intrinsic operation, reg+mem.
|
||||
def PDrm_Int : PDI<opc, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f64mem:$src2),
|
||||
!strconcat(OpcodeStr, "pd {$src2, $dst|$dst, $src2}"),
|
||||
[(set VR128:$dst, (V2F64Int VR128:$src1, (load addr:$src2)))]>;
|
||||
}
|
||||
}
|
||||
|
||||
defm MAX : sse2_fp_binop_rm<0x5F, "max", X86fmax,
|
||||
int_x86_sse2_max_sd, int_x86_sse2_max_pd>;
|
||||
defm MIN : sse2_fp_binop_rm<0x5D, "min", X86fmin,
|
||||
int_x86_sse2_min_sd, int_x86_sse2_min_pd>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// SSE packed FP Instructions
|
||||
|
@ -1234,65 +1371,80 @@ def Int_CVTSS2SDrm: I<0x5A, MRMSrcMem,
|
|||
Requires<[HasSSE2]>;
|
||||
}
|
||||
|
||||
/// packed_sse2_fp_binop_rm - Packed SSE binops come in three basic forms:
|
||||
/// 1. v2f64 - This comes in SSE2 form for doubles.
|
||||
/// 2. rr vs rm - They include a reg+reg form and a ref+mem form.
|
||||
// Arithmetic
|
||||
|
||||
/// sse2_fp_unop_rm - SSE2 unops come in both scalar and vector forms.
|
||||
///
|
||||
let isTwoAddress = 1 in {
|
||||
multiclass packed_sse2_fp_binop_rm<bits<8> opc, string OpcodeStr,
|
||||
SDNode OpNode, bit Commutable = 0> {
|
||||
// Packed operation, reg+reg.
|
||||
def PDrr : PDI<opc, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
|
||||
!strconcat(OpcodeStr, "pd {$src2, $dst|$dst, $src2}"),
|
||||
[(set VR128:$dst, (v2f64 (OpNode VR128:$src1, VR128:$src2)))]> {
|
||||
/// In addition, we also have a special variant of the scalar form here to
|
||||
/// represent the associated intrinsic operation. This form is unlike the
|
||||
/// plain scalar form, in that it takes an entire vector (instead of a
|
||||
/// scalar) and leaves the top elements undefined.
|
||||
///
|
||||
/// And, we have a special variant form for a full-vector intrinsic form.
|
||||
///
|
||||
/// These four forms can each have a reg or a mem operand, so there are a
|
||||
/// total of eight "instructions".
|
||||
///
|
||||
multiclass sse2_fp_unop_rm<bits<8> opc, string OpcodeStr,
|
||||
SDNode OpNode,
|
||||
Intrinsic F64Int,
|
||||
Intrinsic V2F64Int,
|
||||
bit Commutable = 0> {
|
||||
// Scalar operation, reg.
|
||||
def SDr : SDI<opc, MRMSrcReg, (ops FR64:$dst, FR64:$src),
|
||||
!strconcat(OpcodeStr, "sd {$src, $dst|$dst, $src}"),
|
||||
[(set FR64:$dst, (OpNode FR64:$src))]> {
|
||||
let isCommutable = Commutable;
|
||||
}
|
||||
|
||||
// Packed operation, reg+mem.
|
||||
def PDrm : PDI<opc, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
|
||||
!strconcat(OpcodeStr, "pd {$src2, $dst|$dst, $src2}"),
|
||||
[(set VR128:$dst, (OpNode VR128:$src1, (loadv2f64 addr:$src2)))]>;
|
||||
}
|
||||
}
|
||||
|
||||
defm ADD : packed_sse2_fp_binop_rm<0x58, "add", fadd, 1>;
|
||||
defm MUL : packed_sse2_fp_binop_rm<0x59, "mul", fmul, 1>;
|
||||
defm DIV : packed_sse2_fp_binop_rm<0x5E, "div", fdiv>;
|
||||
defm SUB : packed_sse2_fp_binop_rm<0x5C, "sub", fsub>;
|
||||
|
||||
// Arithmetic
|
||||
|
||||
class PD_Intr<bits<8> o, string OpcodeStr, Intrinsic IntId>
|
||||
: PDI<o, MRMSrcReg, (ops VR128:$dst, VR128:$src),
|
||||
!strconcat(OpcodeStr, " {$src, $dst|$dst, $src}"),
|
||||
[(set VR128:$dst, (IntId VR128:$src))]>;
|
||||
class PD_Intm<bits<8> o, string OpcodeStr, Intrinsic IntId>
|
||||
: PDI<o, MRMSrcMem, (ops VR128:$dst, f64mem:$src),
|
||||
!strconcat(OpcodeStr, " {$src, $dst|$dst, $src}"),
|
||||
[(set VR128:$dst, (IntId (load addr:$src)))]>;
|
||||
|
||||
class PD_Intrr<bits<8> o, string OpcodeStr, Intrinsic IntId>
|
||||
: PDI<o, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
|
||||
!strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
|
||||
[(set VR128:$dst, (IntId VR128:$src1, VR128:$src2))]>;
|
||||
class PD_Intrm<bits<8> o, string OpcodeStr, Intrinsic IntId>
|
||||
: PDI<o, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f64mem:$src2),
|
||||
!strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
|
||||
[(set VR128:$dst, (IntId VR128:$src1, (load addr:$src2)))]>;
|
||||
|
||||
def SQRTPDr : PD_Intr<0x51, "sqrtpd", int_x86_sse2_sqrt_pd>;
|
||||
def SQRTPDm : PD_Intm<0x51, "sqrtpd", int_x86_sse2_sqrt_pd>;
|
||||
|
||||
let isTwoAddress = 1 in {
|
||||
let isCommutable = 1 in {
|
||||
def MAXPDrr : PD_Intrr<0x5F, "maxpd", int_x86_sse2_max_pd>;
|
||||
def MINPDrr : PD_Intrr<0x5D, "minpd", int_x86_sse2_min_pd>;
|
||||
// Scalar operation, mem.
|
||||
def SDm : SDI<opc, MRMSrcMem, (ops FR64:$dst, f64mem:$src),
|
||||
!strconcat(OpcodeStr, "sd {$src, $dst|$dst, $src}"),
|
||||
[(set FR64:$dst, (OpNode (load addr:$src)))]>;
|
||||
|
||||
// Vector operation, reg.
|
||||
def PDr : PDI<opc, MRMSrcReg, (ops VR128:$dst, VR128:$src),
|
||||
!strconcat(OpcodeStr, "pd {$src, $dst|$dst, $src}"),
|
||||
[(set VR128:$dst, (v2f64 (OpNode VR128:$src)))]> {
|
||||
let isCommutable = Commutable;
|
||||
}
|
||||
|
||||
def MAXPDrm : PD_Intrm<0x5F, "maxpd", int_x86_sse2_max_pd>;
|
||||
def MINPDrm : PD_Intrm<0x5D, "minpd", int_x86_sse2_min_pd>;
|
||||
// Vector operation, mem.
|
||||
def PDm : PDI<opc, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
|
||||
!strconcat(OpcodeStr, "pd {$src, $dst|$dst, $src}"),
|
||||
[(set VR128:$dst, (OpNode (loadv2f64 addr:$src)))]>;
|
||||
|
||||
// Intrinsic operation, reg.
|
||||
def SDr_Int : SDI<opc, MRMSrcReg, (ops VR128:$dst, VR128:$src),
|
||||
!strconcat(OpcodeStr, "sd {$src, $dst|$dst, $src}"),
|
||||
[(set VR128:$dst, (F64Int VR128:$src))]> {
|
||||
let isCommutable = Commutable;
|
||||
}
|
||||
|
||||
// Intrinsic operation, mem.
|
||||
def SDm_Int : SDI<opc, MRMSrcMem, (ops VR128:$dst, sdmem:$src),
|
||||
!strconcat(OpcodeStr, "sd {$src, $dst|$dst, $src}"),
|
||||
[(set VR128:$dst, (F64Int sse_load_f64:$src))]>;
|
||||
|
||||
// Vector intrinsic operation, reg
|
||||
def PDr_Int : PDI<opc, MRMSrcReg, (ops VR128:$dst, VR128:$src),
|
||||
!strconcat(OpcodeStr, "pd {$src, $dst|$dst, $src}"),
|
||||
[(set VR128:$dst, (V2F64Int VR128:$src))]> {
|
||||
let isCommutable = Commutable;
|
||||
}
|
||||
|
||||
// Vector intrinsic operation, mem
|
||||
def PDm_Int : PDI<opc, MRMSrcMem, (ops VR128:$dst, f64mem:$src),
|
||||
!strconcat(OpcodeStr, "pd {$src, $dst|$dst, $src}"),
|
||||
[(set VR128:$dst, (V2F64Int (load addr:$src)))]>;
|
||||
}
|
||||
|
||||
// Square root.
|
||||
defm SQRT : sse2_fp_unop_rm<0x51, "sqrt", fsqrt,
|
||||
int_x86_sse2_sqrt_sd, int_x86_sse2_sqrt_pd>;
|
||||
|
||||
// There is no f64 version of the reciprocal approximation instructions.
|
||||
|
||||
// Logical
|
||||
let isTwoAddress = 1 in {
|
||||
let isCommutable = 1 in {
|
||||
|
|
|
@ -758,9 +758,21 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI,
|
|||
{ X86::IMUL16rr, X86::IMUL16rm },
|
||||
{ X86::IMUL32rr, X86::IMUL32rm },
|
||||
{ X86::MAXPDrr, X86::MAXPDrm },
|
||||
{ X86::MAXPDrr_Int, X86::MAXPDrm_Int },
|
||||
{ X86::MAXPSrr, X86::MAXPSrm },
|
||||
{ X86::MAXPSrr_Int, X86::MAXPSrm_Int },
|
||||
{ X86::MAXSDrr, X86::MAXSDrm },
|
||||
{ X86::MAXSDrr_Int, X86::MAXSDrm_Int },
|
||||
{ X86::MAXSSrr, X86::MAXSSrm },
|
||||
{ X86::MAXSSrr_Int, X86::MAXSSrm_Int },
|
||||
{ X86::MINPDrr, X86::MINPDrm },
|
||||
{ X86::MINPDrr_Int, X86::MINPDrm_Int },
|
||||
{ X86::MINPSrr, X86::MINPSrm },
|
||||
{ X86::MINPSrr_Int, X86::MINPSrm_Int },
|
||||
{ X86::MINSDrr, X86::MINSDrm },
|
||||
{ X86::MINSDrr_Int, X86::MINSDrm_Int },
|
||||
{ X86::MINSSrr, X86::MINSSrm },
|
||||
{ X86::MINSSrr_Int, X86::MINSSrm_Int },
|
||||
{ X86::MULPDrr, X86::MULPDrm },
|
||||
{ X86::MULPSrr, X86::MULPSrm },
|
||||
{ X86::MULSDrr, X86::MULSDrm },
|
||||
|
@ -825,15 +837,23 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI,
|
|||
{ X86::PUNPCKLWDrr, X86::PUNPCKLWDrm },
|
||||
{ X86::PXORrr, X86::PXORrm },
|
||||
{ X86::RCPPSr, X86::RCPPSm },
|
||||
{ X86::RCPPSr_Int, X86::RCPPSm_Int },
|
||||
{ X86::RSQRTPSr, X86::RSQRTPSm },
|
||||
{ X86::RSQRTPSr_Int, X86::RSQRTPSm_Int },
|
||||
{ X86::RSQRTSSr, X86::RSQRTSSm },
|
||||
{ X86::RSQRTSSr_Int, X86::RSQRTSSm_Int },
|
||||
{ X86::SBB32rr, X86::SBB32rm },
|
||||
{ X86::SBB64rr, X86::SBB64rm },
|
||||
{ X86::SHUFPDrri, X86::SHUFPDrmi },
|
||||
{ X86::SHUFPSrri, X86::SHUFPSrmi },
|
||||
{ X86::SQRTPDr, X86::SQRTPDm },
|
||||
{ X86::SQRTPDr_Int, X86::SQRTPDm_Int },
|
||||
{ X86::SQRTPSr, X86::SQRTPSm },
|
||||
{ X86::SQRTPSr_Int, X86::SQRTPSm_Int },
|
||||
{ X86::SQRTSDr, X86::SQRTSDm },
|
||||
{ X86::SQRTSDr_Int, X86::SQRTSDm_Int },
|
||||
{ X86::SQRTSSr, X86::SQRTSSm },
|
||||
{ X86::SQRTSSr_Int, X86::SQRTSSm_Int },
|
||||
{ X86::SUB16rr, X86::SUB16rm },
|
||||
{ X86::SUB32rr, X86::SUB32rm },
|
||||
{ X86::SUB64rr, X86::SUB64rm },
|
||||
|
|
Loading…
Reference in New Issue