forked from OSchip/llvm-project
Improve generated code for extending loads and some trunc stores on ARM.
Teach TargetSelectionDAG about lengthening loads for vector types and set v4i8 as legal. Allow FP_TO_UINT for v4i16 from v4i32. llvm-svn: 150956
This commit is contained in:
parent
be3df7b3ee
commit
547d4c0662
|
@ -657,6 +657,51 @@ def zextloadi32 : PatFrag<(ops node:$ptr), (zextload node:$ptr), [{
|
|||
return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i32;
|
||||
}]>;
|
||||
|
||||
def extloadvi1 : PatFrag<(ops node:$ptr), (extload node:$ptr), [{
|
||||
return cast<LoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i1;
|
||||
}]>;
|
||||
def extloadvi8 : PatFrag<(ops node:$ptr), (extload node:$ptr), [{
|
||||
return cast<LoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
|
||||
}]>;
|
||||
def extloadvi16 : PatFrag<(ops node:$ptr), (extload node:$ptr), [{
|
||||
return cast<LoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16;
|
||||
}]>;
|
||||
def extloadvi32 : PatFrag<(ops node:$ptr), (extload node:$ptr), [{
|
||||
return cast<LoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
|
||||
}]>;
|
||||
def extloadvf32 : PatFrag<(ops node:$ptr), (extload node:$ptr), [{
|
||||
return cast<LoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::f32;
|
||||
}]>;
|
||||
def extloadvf64 : PatFrag<(ops node:$ptr), (extload node:$ptr), [{
|
||||
return cast<LoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::f64;
|
||||
}]>;
|
||||
|
||||
def sextloadvi1 : PatFrag<(ops node:$ptr), (sextload node:$ptr), [{
|
||||
return cast<LoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i1;
|
||||
}]>;
|
||||
def sextloadvi8 : PatFrag<(ops node:$ptr), (sextload node:$ptr), [{
|
||||
return cast<LoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
|
||||
}]>;
|
||||
def sextloadvi16 : PatFrag<(ops node:$ptr), (sextload node:$ptr), [{
|
||||
return cast<LoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16;
|
||||
}]>;
|
||||
def sextloadvi32 : PatFrag<(ops node:$ptr), (sextload node:$ptr), [{
|
||||
return cast<LoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
|
||||
}]>;
|
||||
|
||||
def zextloadvi1 : PatFrag<(ops node:$ptr), (zextload node:$ptr), [{
|
||||
return cast<LoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i1;
|
||||
}]>;
|
||||
def zextloadvi8 : PatFrag<(ops node:$ptr), (zextload node:$ptr), [{
|
||||
return cast<LoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
|
||||
}]>;
|
||||
def zextloadvi16 : PatFrag<(ops node:$ptr), (zextload node:$ptr), [{
|
||||
return cast<LoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16;
|
||||
}]>;
|
||||
def zextloadvi32 : PatFrag<(ops node:$ptr), (zextload node:$ptr), [{
|
||||
return cast<LoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
|
||||
}]>;
|
||||
|
||||
// store fragments.
|
||||
def unindexedstore : PatFrag<(ops node:$val, node:$ptr),
|
||||
(st node:$val, node:$ptr), [{
|
||||
|
|
|
@ -533,9 +533,13 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
|
|||
setOperationAction(ISD::SETCC, MVT::v1i64, Expand);
|
||||
setOperationAction(ISD::SETCC, MVT::v2i64, Expand);
|
||||
// Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with
|
||||
// a destination type that is wider than the source.
|
||||
// a destination type that is wider than the source, and nor does
|
||||
// it have a FP_TO_[SU]INT instruction with a narrower destination than
|
||||
// source.
|
||||
setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
|
||||
setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
|
||||
setOperationAction(ISD::FP_TO_UINT, MVT::v4i16, Custom);
|
||||
setOperationAction(ISD::FP_TO_SINT, MVT::v4i16, Custom);
|
||||
|
||||
setTargetDAGCombine(ISD::INTRINSIC_VOID);
|
||||
setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
|
||||
|
@ -555,7 +559,15 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
|
|||
setTargetDAGCombine(ISD::FP_TO_UINT);
|
||||
setTargetDAGCombine(ISD::FDIV);
|
||||
|
||||
setLoadExtAction(ISD::EXTLOAD, MVT::v4i8, Expand);
|
||||
// It is legal to extload from v4i8 to v4i16 or v4i32.
|
||||
MVT Tys[6] = {MVT::v8i8, MVT::v4i8, MVT::v2i8,
|
||||
MVT::v4i16, MVT::v2i16,
|
||||
MVT::v2i32};
|
||||
for (unsigned i = 0; i < 6; ++i) {
|
||||
setLoadExtAction(ISD::EXTLOAD, Tys[i], Legal);
|
||||
setLoadExtAction(ISD::ZEXTLOAD, Tys[i], Legal);
|
||||
setLoadExtAction(ISD::SEXTLOAD, Tys[i], Legal);
|
||||
}
|
||||
}
|
||||
|
||||
computeRegisterProperties();
|
||||
|
@ -3058,12 +3070,22 @@ SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
|
|||
}
|
||||
|
||||
static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
|
||||
assert(Op.getValueType().getVectorElementType() == MVT::i32
|
||||
&& "Unexpected custom lowering");
|
||||
EVT VT = Op.getValueType();
|
||||
DebugLoc dl = Op.getDebugLoc();
|
||||
|
||||
if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::f32)
|
||||
return Op;
|
||||
return DAG.UnrollVectorOp(Op.getNode());
|
||||
if (Op.getValueType().getVectorElementType() == MVT::i32) {
|
||||
if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::f32)
|
||||
return Op;
|
||||
return DAG.UnrollVectorOp(Op.getNode());
|
||||
}
|
||||
|
||||
assert(Op.getOperand(0).getValueType() == MVT::v4f32 &&
|
||||
"Invalid type for custom lowering!");
|
||||
if (VT != MVT::v4i16)
|
||||
return DAG.UnrollVectorOp(Op.getNode());
|
||||
|
||||
Op = DAG.getNode(Op.getOpcode(), dl, MVT::v4i32, Op.getOperand(0));
|
||||
return DAG.getNode(ISD::TRUNCATE, dl, VT, Op);
|
||||
}
|
||||
|
||||
static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
|
||||
|
|
|
@ -5624,6 +5624,117 @@ def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>;
|
|||
def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>;
|
||||
def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>;
|
||||
|
||||
// Vector lengthening move with load, matching extending loads.
|
||||
|
||||
// extload, zextload and sextload for a standard lengthening load. Example:
|
||||
// Lengthen_Single<"8", "i16", "i8"> = Pat<(v8i16 (extloadvi8 addrmode5:$addr))
|
||||
// (VMOVLuv8i16 (VLDRD addrmode5:$addr))>;
|
||||
multiclass Lengthen_Single<string DestLanes, string DestTy, string SrcTy> {
|
||||
def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
|
||||
(!cast<PatFrag>("extloadv" # SrcTy) addrmode5:$addr)),
|
||||
(!cast<Instruction>("VMOVLuv" # DestLanes # DestTy)
|
||||
(VLDRD addrmode5:$addr))>;
|
||||
def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
|
||||
(!cast<PatFrag>("zextloadv" # SrcTy) addrmode5:$addr)),
|
||||
(!cast<Instruction>("VMOVLuv" # DestLanes # DestTy)
|
||||
(VLDRD addrmode5:$addr))>;
|
||||
def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
|
||||
(!cast<PatFrag>("sextloadv" # SrcTy) addrmode5:$addr)),
|
||||
(!cast<Instruction>("VMOVLsv" # DestLanes # DestTy)
|
||||
(VLDRD addrmode5:$addr))>;
|
||||
}
|
||||
|
||||
// extload, zextload and sextload for a lengthening load which only uses
|
||||
// half the lanes available. Example:
|
||||
// Lengthen_HalfSingle<"4", "i16", "8", "i16", "i8"> =
|
||||
// Pat<(v4i16 (extloadvi8 addrmode5:$addr))
|
||||
// (EXTRACT_SUBREG (VMOVLuv8i16 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
|
||||
// (VLDRS addrmode5:$addr),
|
||||
// ssub_0)),
|
||||
// dsub_0)>;
|
||||
multiclass Lengthen_HalfSingle<string DestLanes, string DestTy, string SrcTy,
|
||||
string InsnLanes, string InsnTy> {
|
||||
def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
|
||||
(!cast<PatFrag>("extloadv" # SrcTy) addrmode5:$addr)),
|
||||
(EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy)
|
||||
(INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), ssub_0)),
|
||||
dsub_0)>;
|
||||
def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
|
||||
(!cast<PatFrag>("zextloadv" # SrcTy) addrmode5:$addr)),
|
||||
(EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy)
|
||||
(INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), ssub_0)),
|
||||
dsub_0)>;
|
||||
def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
|
||||
(!cast<PatFrag>("sextloadv" # SrcTy) addrmode5:$addr)),
|
||||
(EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # InsnLanes # InsnTy)
|
||||
(INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), ssub_0)),
|
||||
dsub_0)>;
|
||||
}
|
||||
|
||||
// extload, zextload and sextload for a lengthening load followed by another
|
||||
// lengthening load, to quadruple the initial length.
|
||||
// Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32", qsub_0> =
|
||||
// Pat<(v4i32 (extloadvi8 addrmode5:$addr))
|
||||
// (EXTRACT_SUBREG (VMOVLuv4i32
|
||||
// (EXTRACT_SUBREG (VMOVLuv8i16 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
|
||||
// (VLDRS addrmode5:$addr),
|
||||
// ssub_0)),
|
||||
// dsub_0)),
|
||||
// qsub_0)>;
|
||||
multiclass Lengthen_Double<string DestLanes, string DestTy, string SrcTy,
|
||||
string Insn1Lanes, string Insn1Ty, string Insn2Lanes,
|
||||
string Insn2Ty, SubRegIndex RegType> {
|
||||
def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
|
||||
(!cast<PatFrag>("extloadv" # SrcTy) addrmode5:$addr)),
|
||||
(EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
|
||||
(EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
|
||||
(INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr),
|
||||
ssub_0)), dsub_0)),
|
||||
RegType)>;
|
||||
def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
|
||||
(!cast<PatFrag>("zextloadv" # SrcTy) addrmode5:$addr)),
|
||||
(EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
|
||||
(EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
|
||||
(INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr),
|
||||
ssub_0)), dsub_0)),
|
||||
RegType)>;
|
||||
def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
|
||||
(!cast<PatFrag>("sextloadv" # SrcTy) addrmode5:$addr)),
|
||||
(EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty)
|
||||
(EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty)
|
||||
(INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr),
|
||||
ssub_0)), dsub_0)),
|
||||
RegType)>;
|
||||
}
|
||||
|
||||
defm : Lengthen_Single<"8", "i16", "i8">; // v8i8 -> v8i16
|
||||
defm : Lengthen_Single<"4", "i32", "i16">; // v4i16 -> v4i32
|
||||
defm : Lengthen_Single<"2", "i64", "i32">; // v2i32 -> v2i64
|
||||
|
||||
defm : Lengthen_HalfSingle<"4", "i16", "i8", "8", "i16">; // v4i8 -> v4i16
|
||||
defm : Lengthen_HalfSingle<"2", "i16", "i8", "8", "i16">; // v2i8 -> v2i16
|
||||
defm : Lengthen_HalfSingle<"2", "i32", "i16", "4", "i32">; // v2i16 -> v2i32
|
||||
|
||||
// Double lengthening - v4i8 -> v4i16 -> v4i32
|
||||
defm : Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32", qsub_0>;
|
||||
// v2i8 -> v2i16 -> v2i32
|
||||
defm : Lengthen_Double<"2", "i32", "i8", "8", "i16", "4", "i32", dsub_0>;
|
||||
// v2i16 -> v2i32 -> v2i64
|
||||
defm : Lengthen_Double<"2", "i64", "i16", "4", "i32", "2", "i64", qsub_0>;
|
||||
|
||||
// Triple lengthening - v2i8 -> v2i16 -> v2i32 -> v2i64
|
||||
def : Pat<(v2i64 (extloadvi8 addrmode5:$addr)),
|
||||
(VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
|
||||
(INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), ssub_0)),
|
||||
dsub_0)), dsub_0))>;
|
||||
def : Pat<(v2i64 (zextloadvi8 addrmode5:$addr)),
|
||||
(VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
|
||||
(INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), ssub_0)),
|
||||
dsub_0)), dsub_0))>;
|
||||
def : Pat<(v2i64 (sextloadvi8 addrmode5:$addr)),
|
||||
(VMOVLsv2i64 (EXTRACT_SUBREG (VMOVLsv4i32 (EXTRACT_SUBREG (VMOVLsv8i16
|
||||
(INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), ssub_0)),
|
||||
dsub_0)), dsub_0))>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Assembler aliases
|
||||
|
|
Loading…
Reference in New Issue