forked from OSchip/llvm-project
Remove 256-bit specific node types for UNPCKHPS/D and instead use the 128-bit versions and let the operand type disinquish. Also fix the load form of the v8i32 patterns for these to realize that the load would be promoted to v4i64.
llvm-svn: 145126
This commit is contained in:
parent
d26466748b
commit
d65a444478
|
@ -2845,16 +2845,12 @@ static bool isTargetShuffle(unsigned Opcode) {
|
|||
case X86ISD::MOVSD:
|
||||
case X86ISD::UNPCKLPS:
|
||||
case X86ISD::UNPCKLPD:
|
||||
case X86ISD::VUNPCKLPSY:
|
||||
case X86ISD::VUNPCKLPDY:
|
||||
case X86ISD::PUNPCKLWD:
|
||||
case X86ISD::PUNPCKLBW:
|
||||
case X86ISD::PUNPCKLDQ:
|
||||
case X86ISD::PUNPCKLQDQ:
|
||||
case X86ISD::UNPCKHPS:
|
||||
case X86ISD::UNPCKHPD:
|
||||
case X86ISD::VUNPCKHPSY:
|
||||
case X86ISD::VUNPCKHPDY:
|
||||
case X86ISD::PUNPCKHWD:
|
||||
case X86ISD::PUNPCKHBW:
|
||||
case X86ISD::PUNPCKHDQ:
|
||||
|
@ -2926,16 +2922,12 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
|
|||
case X86ISD::MOVSD:
|
||||
case X86ISD::UNPCKLPS:
|
||||
case X86ISD::UNPCKLPD:
|
||||
case X86ISD::VUNPCKLPSY:
|
||||
case X86ISD::VUNPCKLPDY:
|
||||
case X86ISD::PUNPCKLWD:
|
||||
case X86ISD::PUNPCKLBW:
|
||||
case X86ISD::PUNPCKLDQ:
|
||||
case X86ISD::PUNPCKLQDQ:
|
||||
case X86ISD::UNPCKHPS:
|
||||
case X86ISD::UNPCKHPD:
|
||||
case X86ISD::VUNPCKHPSY:
|
||||
case X86ISD::VUNPCKHPDY:
|
||||
case X86ISD::PUNPCKHWD:
|
||||
case X86ISD::PUNPCKHBW:
|
||||
case X86ISD::PUNPCKHDQ:
|
||||
|
@ -4651,8 +4643,6 @@ static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG,
|
|||
break;
|
||||
case X86ISD::UNPCKHPS:
|
||||
case X86ISD::UNPCKHPD:
|
||||
case X86ISD::VUNPCKHPSY:
|
||||
case X86ISD::VUNPCKHPDY:
|
||||
DecodeUNPCKHPMask(VT, ShuffleMask);
|
||||
break;
|
||||
case X86ISD::PUNPCKLBW:
|
||||
|
@ -4663,8 +4653,6 @@ static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG,
|
|||
break;
|
||||
case X86ISD::UNPCKLPS:
|
||||
case X86ISD::UNPCKLPD:
|
||||
case X86ISD::VUNPCKLPSY:
|
||||
case X86ISD::VUNPCKLPDY:
|
||||
DecodeUNPCKLPMask(VT, ShuffleMask);
|
||||
break;
|
||||
case X86ISD::MOVHLPS:
|
||||
|
@ -6582,16 +6570,16 @@ static inline unsigned getUNPCKLOpcode(EVT VT, bool HasAVX2) {
|
|||
switch(VT.getSimpleVT().SimpleTy) {
|
||||
case MVT::v4i32: return X86ISD::PUNPCKLDQ;
|
||||
case MVT::v2i64: return X86ISD::PUNPCKLQDQ;
|
||||
case MVT::v4f32: return X86ISD::UNPCKLPS;
|
||||
case MVT::v2f64: return X86ISD::UNPCKLPD;
|
||||
case MVT::v8i32:
|
||||
if (HasAVX2) return X86ISD::PUNPCKLDQ;
|
||||
// else use fp unit for int unpack.
|
||||
case MVT::v8f32: return X86ISD::VUNPCKLPSY;
|
||||
case MVT::v8f32:
|
||||
case MVT::v4f32: return X86ISD::UNPCKLPS;
|
||||
case MVT::v4i64:
|
||||
if (HasAVX2) return X86ISD::PUNPCKLQDQ;
|
||||
// else use fp unit for int unpack.
|
||||
case MVT::v4f64: return X86ISD::VUNPCKLPDY;
|
||||
case MVT::v4f64:
|
||||
case MVT::v2f64: return X86ISD::UNPCKLPD;
|
||||
case MVT::v32i8:
|
||||
case MVT::v16i8: return X86ISD::PUNPCKLBW;
|
||||
case MVT::v16i16:
|
||||
|
@ -6606,16 +6594,16 @@ static inline unsigned getUNPCKHOpcode(EVT VT, bool HasAVX2) {
|
|||
switch(VT.getSimpleVT().SimpleTy) {
|
||||
case MVT::v4i32: return X86ISD::PUNPCKHDQ;
|
||||
case MVT::v2i64: return X86ISD::PUNPCKHQDQ;
|
||||
case MVT::v4f32: return X86ISD::UNPCKHPS;
|
||||
case MVT::v2f64: return X86ISD::UNPCKHPD;
|
||||
case MVT::v8i32:
|
||||
if (HasAVX2) return X86ISD::PUNPCKHDQ;
|
||||
// else use fp unit for int unpack.
|
||||
case MVT::v8f32: return X86ISD::VUNPCKHPSY;
|
||||
case MVT::v8f32:
|
||||
case MVT::v4f32: return X86ISD::UNPCKHPS;
|
||||
case MVT::v4i64:
|
||||
if (HasAVX2) return X86ISD::PUNPCKHQDQ;
|
||||
// else use fp unit for int unpack.
|
||||
case MVT::v4f64: return X86ISD::VUNPCKHPDY;
|
||||
case MVT::v4f64:
|
||||
case MVT::v2f64: return X86ISD::UNPCKHPD;
|
||||
case MVT::v32i8:
|
||||
case MVT::v16i8: return X86ISD::PUNPCKHBW;
|
||||
case MVT::v16i16:
|
||||
|
@ -11280,8 +11268,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
|
|||
case X86ISD::MOVSS: return "X86ISD::MOVSS";
|
||||
case X86ISD::UNPCKLPS: return "X86ISD::UNPCKLPS";
|
||||
case X86ISD::UNPCKLPD: return "X86ISD::UNPCKLPD";
|
||||
case X86ISD::VUNPCKLPSY: return "X86ISD::VUNPCKLPSY";
|
||||
case X86ISD::VUNPCKLPDY: return "X86ISD::VUNPCKLPDY";
|
||||
case X86ISD::UNPCKHPS: return "X86ISD::UNPCKHPS";
|
||||
case X86ISD::UNPCKHPD: return "X86ISD::UNPCKHPD";
|
||||
case X86ISD::PUNPCKLBW: return "X86ISD::PUNPCKLBW";
|
||||
|
@ -14877,16 +14863,12 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
|
|||
case X86ISD::PUNPCKHQDQ:
|
||||
case X86ISD::UNPCKHPS:
|
||||
case X86ISD::UNPCKHPD:
|
||||
case X86ISD::VUNPCKHPSY:
|
||||
case X86ISD::VUNPCKHPDY:
|
||||
case X86ISD::PUNPCKLBW:
|
||||
case X86ISD::PUNPCKLWD:
|
||||
case X86ISD::PUNPCKLDQ:
|
||||
case X86ISD::PUNPCKLQDQ:
|
||||
case X86ISD::UNPCKLPS:
|
||||
case X86ISD::UNPCKLPD:
|
||||
case X86ISD::VUNPCKLPSY:
|
||||
case X86ISD::VUNPCKLPDY:
|
||||
case X86ISD::MOVHLPS:
|
||||
case X86ISD::MOVLHPS:
|
||||
case X86ISD::PSHUFD:
|
||||
|
|
|
@ -275,12 +275,8 @@ namespace llvm {
|
|||
MOVSS,
|
||||
UNPCKLPS,
|
||||
UNPCKLPD,
|
||||
VUNPCKLPSY,
|
||||
VUNPCKLPDY,
|
||||
UNPCKHPS,
|
||||
UNPCKHPD,
|
||||
VUNPCKHPSY,
|
||||
VUNPCKHPDY,
|
||||
PUNPCKLBW,
|
||||
PUNPCKLWD,
|
||||
PUNPCKLDQ,
|
||||
|
|
|
@ -132,13 +132,9 @@ def X86Movlpd : SDNode<"X86ISD::MOVLPD", SDTShuff2Op>;
|
|||
|
||||
def X86Unpcklps : SDNode<"X86ISD::UNPCKLPS", SDTShuff2Op>;
|
||||
def X86Unpcklpd : SDNode<"X86ISD::UNPCKLPD", SDTShuff2Op>;
|
||||
def X86Unpcklpsy : SDNode<"X86ISD::VUNPCKLPSY", SDTShuff2Op>;
|
||||
def X86Unpcklpdy : SDNode<"X86ISD::VUNPCKLPDY", SDTShuff2Op>;
|
||||
|
||||
def X86Unpckhps : SDNode<"X86ISD::UNPCKHPS", SDTShuff2Op>;
|
||||
def X86Unpckhpd : SDNode<"X86ISD::UNPCKHPD", SDTShuff2Op>;
|
||||
def X86Unpckhpsy : SDNode<"X86ISD::VUNPCKHPSY", SDTShuff2Op>;
|
||||
def X86Unpckhpdy : SDNode<"X86ISD::VUNPCKHPDY", SDTShuff2Op>;
|
||||
|
||||
def X86Punpcklbw : SDNode<"X86ISD::PUNPCKLBW", SDTShuff2Op>;
|
||||
def X86Punpcklwd : SDNode<"X86ISD::PUNPCKLWD", SDTShuff2Op>;
|
||||
|
|
|
@ -2472,21 +2472,21 @@ let Predicates = [HasAVX] in {
|
|||
def : Pat<(v4f32 (X86Unpckhps VR128:$src1, VR128:$src2)),
|
||||
(VUNPCKHPSrr VR128:$src1, VR128:$src2)>;
|
||||
|
||||
def : Pat<(v8f32 (X86Unpcklpsy VR256:$src1, (memopv8f32 addr:$src2))),
|
||||
def : Pat<(v8f32 (X86Unpcklps VR256:$src1, (memopv8f32 addr:$src2))),
|
||||
(VUNPCKLPSYrm VR256:$src1, addr:$src2)>;
|
||||
def : Pat<(v8f32 (X86Unpcklpsy VR256:$src1, VR256:$src2)),
|
||||
def : Pat<(v8f32 (X86Unpcklps VR256:$src1, VR256:$src2)),
|
||||
(VUNPCKLPSYrr VR256:$src1, VR256:$src2)>;
|
||||
def : Pat<(v8i32 (X86Unpcklpsy VR256:$src1, VR256:$src2)),
|
||||
def : Pat<(v8i32 (X86Unpcklps VR256:$src1, VR256:$src2)),
|
||||
(VUNPCKLPSYrr VR256:$src1, VR256:$src2)>;
|
||||
def : Pat<(v8i32 (X86Unpcklpsy VR256:$src1, (memopv8i32 addr:$src2))),
|
||||
def : Pat<(v8i32 (X86Unpcklps VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))),
|
||||
(VUNPCKLPSYrm VR256:$src1, addr:$src2)>;
|
||||
def : Pat<(v8f32 (X86Unpckhpsy VR256:$src1, (memopv8f32 addr:$src2))),
|
||||
def : Pat<(v8f32 (X86Unpckhps VR256:$src1, (memopv8f32 addr:$src2))),
|
||||
(VUNPCKHPSYrm VR256:$src1, addr:$src2)>;
|
||||
def : Pat<(v8f32 (X86Unpckhpsy VR256:$src1, VR256:$src2)),
|
||||
def : Pat<(v8f32 (X86Unpckhps VR256:$src1, VR256:$src2)),
|
||||
(VUNPCKHPSYrr VR256:$src1, VR256:$src2)>;
|
||||
def : Pat<(v8i32 (X86Unpckhpsy VR256:$src1, (memopv8i32 addr:$src2))),
|
||||
def : Pat<(v8i32 (X86Unpckhps VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))),
|
||||
(VUNPCKHPSYrm VR256:$src1, addr:$src2)>;
|
||||
def : Pat<(v8i32 (X86Unpckhpsy VR256:$src1, VR256:$src2)),
|
||||
def : Pat<(v8i32 (X86Unpckhps VR256:$src1, VR256:$src2)),
|
||||
(VUNPCKHPSYrr VR256:$src1, VR256:$src2)>;
|
||||
|
||||
def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, (memopv2f64 addr:$src2))),
|
||||
|
@ -2498,21 +2498,21 @@ let Predicates = [HasAVX] in {
|
|||
def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, VR128:$src2)),
|
||||
(VUNPCKHPDrr VR128:$src1, VR128:$src2)>;
|
||||
|
||||
def : Pat<(v4f64 (X86Unpcklpdy VR256:$src1, (memopv4f64 addr:$src2))),
|
||||
def : Pat<(v4f64 (X86Unpcklpd VR256:$src1, (memopv4f64 addr:$src2))),
|
||||
(VUNPCKLPDYrm VR256:$src1, addr:$src2)>;
|
||||
def : Pat<(v4f64 (X86Unpcklpdy VR256:$src1, VR256:$src2)),
|
||||
def : Pat<(v4f64 (X86Unpcklpd VR256:$src1, VR256:$src2)),
|
||||
(VUNPCKLPDYrr VR256:$src1, VR256:$src2)>;
|
||||
def : Pat<(v4i64 (X86Unpcklpdy VR256:$src1, (memopv4i64 addr:$src2))),
|
||||
def : Pat<(v4i64 (X86Unpcklpd VR256:$src1, (memopv4i64 addr:$src2))),
|
||||
(VUNPCKLPDYrm VR256:$src1, addr:$src2)>;
|
||||
def : Pat<(v4i64 (X86Unpcklpdy VR256:$src1, VR256:$src2)),
|
||||
def : Pat<(v4i64 (X86Unpcklpd VR256:$src1, VR256:$src2)),
|
||||
(VUNPCKLPDYrr VR256:$src1, VR256:$src2)>;
|
||||
def : Pat<(v4f64 (X86Unpckhpdy VR256:$src1, (memopv4f64 addr:$src2))),
|
||||
def : Pat<(v4f64 (X86Unpckhpd VR256:$src1, (memopv4f64 addr:$src2))),
|
||||
(VUNPCKHPDYrm VR256:$src1, addr:$src2)>;
|
||||
def : Pat<(v4f64 (X86Unpckhpdy VR256:$src1, VR256:$src2)),
|
||||
def : Pat<(v4f64 (X86Unpckhpd VR256:$src1, VR256:$src2)),
|
||||
(VUNPCKHPDYrr VR256:$src1, VR256:$src2)>;
|
||||
def : Pat<(v4i64 (X86Unpckhpdy VR256:$src1, (memopv4i64 addr:$src2))),
|
||||
def : Pat<(v4i64 (X86Unpckhpd VR256:$src1, (memopv4i64 addr:$src2))),
|
||||
(VUNPCKHPDYrm VR256:$src1, addr:$src2)>;
|
||||
def : Pat<(v4i64 (X86Unpckhpdy VR256:$src1, VR256:$src2)),
|
||||
def : Pat<(v4i64 (X86Unpckhpd VR256:$src1, VR256:$src2)),
|
||||
(VUNPCKHPDYrr VR256:$src1, VR256:$src2)>;
|
||||
|
||||
// FIXME: Instead of X86Movddup, there should be a X86Unpcklpd here, the
|
||||
|
|
|
@ -67,6 +67,15 @@ entry:
|
|||
ret <8 x i32> %shuffle.i
|
||||
}
|
||||
|
||||
; CHECK: vunpckhps (%
|
||||
define <8 x i32> @unpackhips2(<8 x i32>* %src1, <8 x i32>* %src2) nounwind uwtable readnone ssp {
|
||||
entry:
|
||||
%a = load <8 x i32>* %src1
|
||||
%b = load <8 x i32>* %src2
|
||||
%shuffle.i = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
|
||||
ret <8 x i32> %shuffle.i
|
||||
}
|
||||
|
||||
; CHECK: vunpckhpd
|
||||
define <4 x i64> @unpackhipd1(<4 x i64> %src1, <4 x i64> %src2) nounwind uwtable readnone ssp {
|
||||
entry:
|
||||
|
@ -74,6 +83,15 @@ entry:
|
|||
ret <4 x i64> %shuffle.i
|
||||
}
|
||||
|
||||
; CHECK: vunpckhpd (%
|
||||
define <4 x i64> @unpackhipd2(<4 x i64>* %src1, <4 x i64>* %src2) nounwind uwtable readnone ssp {
|
||||
entry:
|
||||
%a = load <4 x i64>* %src1
|
||||
%b = load <4 x i64>* %src2
|
||||
%shuffle.i = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
|
||||
ret <4 x i64> %shuffle.i
|
||||
}
|
||||
|
||||
; CHECK: vunpcklps
|
||||
define <8 x i32> @unpacklops1(<8 x i32> %src1, <8 x i32> %src2) nounwind uwtable readnone ssp {
|
||||
entry:
|
||||
|
@ -81,9 +99,27 @@ entry:
|
|||
ret <8 x i32> %shuffle.i
|
||||
}
|
||||
|
||||
; CHECK: vunpcklps (%
|
||||
define <8 x i32> @unpacklops2(<8 x i32>* %src1, <8 x i32>* %src2) nounwind uwtable readnone ssp {
|
||||
entry:
|
||||
%a = load <8 x i32>* %src1
|
||||
%b = load <8 x i32>* %src2
|
||||
%shuffle.i = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
|
||||
ret <8 x i32> %shuffle.i
|
||||
}
|
||||
|
||||
; CHECK: vunpcklpd
|
||||
define <4 x i64> @unpacklopd1(<4 x i64> %src1, <4 x i64> %src2) nounwind uwtable readnone ssp {
|
||||
entry:
|
||||
%shuffle.i = shufflevector <4 x i64> %src1, <4 x i64> %src2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
|
||||
ret <4 x i64> %shuffle.i
|
||||
}
|
||||
|
||||
; CHECK: vunpcklpd (%
|
||||
define <4 x i64> @unpacklopd2(<4 x i64>* %src1, <4 x i64>* %src2) nounwind uwtable readnone ssp {
|
||||
entry:
|
||||
%a = load <4 x i64>* %src1
|
||||
%b = load <4 x i64>* %src2
|
||||
%shuffle.i = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
|
||||
ret <4 x i64> %shuffle.i
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue