forked from OSchip/llvm-project
Use fp unpack instructions to unpack int types. Until we have AVX2, this
is the best we can do for these patterns. This fix PR10554. llvm-svn: 137161
This commit is contained in:
parent
4ef2426b87
commit
6963062a99
|
@ -5940,7 +5940,9 @@ static inline unsigned getUNPCKLOpcode(EVT VT) {
|
|||
case MVT::v2i64: return X86ISD::PUNPCKLQDQ;
|
||||
case MVT::v4f32: return X86ISD::UNPCKLPS;
|
||||
case MVT::v2f64: return X86ISD::UNPCKLPD;
|
||||
case MVT::v8i32: // Use fp unit for int unpack.
|
||||
case MVT::v8f32: return X86ISD::VUNPCKLPSY;
|
||||
case MVT::v4i64: // Use fp unit for int unpack.
|
||||
case MVT::v4f64: return X86ISD::VUNPCKLPDY;
|
||||
case MVT::v16i8: return X86ISD::PUNPCKLBW;
|
||||
case MVT::v8i16: return X86ISD::PUNPCKLWD;
|
||||
|
@ -5956,7 +5958,9 @@ static inline unsigned getUNPCKHOpcode(EVT VT) {
|
|||
case MVT::v2i64: return X86ISD::PUNPCKHQDQ;
|
||||
case MVT::v4f32: return X86ISD::UNPCKHPS;
|
||||
case MVT::v2f64: return X86ISD::UNPCKHPD;
|
||||
case MVT::v8i32: // Use fp unit for int unpack.
|
||||
case MVT::v8f32: return X86ISD::VUNPCKHPSY;
|
||||
case MVT::v4i64: // Use fp unit for int unpack.
|
||||
case MVT::v4f64: return X86ISD::VUNPCKHPDY;
|
||||
case MVT::v16i8: return X86ISD::PUNPCKHBW;
|
||||
case MVT::v8i16: return X86ISD::PUNPCKHWD;
|
||||
|
|
|
@ -5799,18 +5799,24 @@ def : Pat<(X86Movddup (bc_v2f64
|
|||
// Shuffle with UNPCKLPS
|
||||
def : Pat<(v4f32 (X86Unpcklps VR128:$src1, (memopv4f32 addr:$src2))),
|
||||
(VUNPCKLPSrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>;
|
||||
def : Pat<(v8f32 (X86Unpcklpsy VR256:$src1, (memopv8f32 addr:$src2))),
|
||||
(VUNPCKLPSYrm VR256:$src1, addr:$src2)>, Requires<[HasAVX]>;
|
||||
def : Pat<(v4f32 (X86Unpcklps VR128:$src1, (memopv4f32 addr:$src2))),
|
||||
(UNPCKLPSrm VR128:$src1, addr:$src2)>;
|
||||
|
||||
def : Pat<(v4f32 (X86Unpcklps VR128:$src1, VR128:$src2)),
|
||||
(VUNPCKLPSrr VR128:$src1, VR128:$src2)>, Requires<[HasAVX]>;
|
||||
def : Pat<(v8f32 (X86Unpcklpsy VR256:$src1, VR256:$src2)),
|
||||
(VUNPCKLPSYrr VR256:$src1, VR256:$src2)>, Requires<[HasAVX]>;
|
||||
def : Pat<(v4f32 (X86Unpcklps VR128:$src1, VR128:$src2)),
|
||||
(UNPCKLPSrr VR128:$src1, VR128:$src2)>;
|
||||
|
||||
// Shuffle with VUNPCKHPSY
|
||||
def : Pat<(v8f32 (X86Unpcklpsy VR256:$src1, (memopv8f32 addr:$src2))),
|
||||
(VUNPCKLPSYrm VR256:$src1, addr:$src2)>, Requires<[HasAVX]>;
|
||||
def : Pat<(v8f32 (X86Unpcklpsy VR256:$src1, VR256:$src2)),
|
||||
(VUNPCKLPSYrr VR256:$src1, VR256:$src2)>, Requires<[HasAVX]>;
|
||||
def : Pat<(v8i32 (X86Unpcklpsy VR256:$src1, VR256:$src2)),
|
||||
(VUNPCKLPSYrr VR256:$src1, VR256:$src2)>, Requires<[HasAVX]>;
|
||||
def : Pat<(v8i32 (X86Unpcklpsy VR256:$src1, (memopv8i32 addr:$src2))),
|
||||
(VUNPCKLPSYrm VR256:$src1, addr:$src2)>, Requires<[HasAVX]>;
|
||||
|
||||
// Shuffle with UNPCKHPS
|
||||
def : Pat<(v4f32 (X86Unpckhps VR128:$src1, (memopv4f32 addr:$src2))),
|
||||
(VUNPCKHPSrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>;
|
||||
|
@ -5828,21 +5834,33 @@ def : Pat<(v8f32 (X86Unpckhpsy VR256:$src1, (memopv8f32 addr:$src2))),
|
|||
def : Pat<(v8f32 (X86Unpckhpsy VR256:$src1, VR256:$src2)),
|
||||
(VUNPCKHPSYrr VR256:$src1, VR256:$src2)>, Requires<[HasAVX]>;
|
||||
|
||||
def : Pat<(v8i32 (X86Unpckhpsy VR256:$src1, (memopv8i32 addr:$src2))),
|
||||
(VUNPCKHPSYrm VR256:$src1, addr:$src2)>, Requires<[HasAVX]>;
|
||||
def : Pat<(v8i32 (X86Unpckhpsy VR256:$src1, VR256:$src2)),
|
||||
(VUNPCKHPSYrr VR256:$src1, VR256:$src2)>, Requires<[HasAVX]>;
|
||||
|
||||
// Shuffle with UNPCKLPD
|
||||
def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, (memopv2f64 addr:$src2))),
|
||||
(VUNPCKLPDrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>;
|
||||
def : Pat<(v4f64 (X86Unpcklpdy VR256:$src1, (memopv4f64 addr:$src2))),
|
||||
(VUNPCKLPDYrm VR256:$src1, addr:$src2)>, Requires<[HasAVX]>;
|
||||
def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, (memopv2f64 addr:$src2))),
|
||||
(UNPCKLPDrm VR128:$src1, addr:$src2)>;
|
||||
|
||||
def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, VR128:$src2)),
|
||||
(VUNPCKLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasAVX]>;
|
||||
def : Pat<(v4f64 (X86Unpcklpdy VR256:$src1, VR256:$src2)),
|
||||
(VUNPCKLPDYrr VR256:$src1, VR256:$src2)>, Requires<[HasAVX]>;
|
||||
def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, VR128:$src2)),
|
||||
(UNPCKLPDrr VR128:$src1, VR128:$src2)>;
|
||||
|
||||
// Shuffle with VUNPCKLPDY
|
||||
def : Pat<(v4f64 (X86Unpcklpdy VR256:$src1, (memopv4f64 addr:$src2))),
|
||||
(VUNPCKLPDYrm VR256:$src1, addr:$src2)>, Requires<[HasAVX]>;
|
||||
def : Pat<(v4f64 (X86Unpcklpdy VR256:$src1, VR256:$src2)),
|
||||
(VUNPCKLPDYrr VR256:$src1, VR256:$src2)>, Requires<[HasAVX]>;
|
||||
|
||||
def : Pat<(v4i64 (X86Unpcklpdy VR256:$src1, (memopv4i64 addr:$src2))),
|
||||
(VUNPCKLPDYrm VR256:$src1, addr:$src2)>, Requires<[HasAVX]>;
|
||||
def : Pat<(v4i64 (X86Unpcklpdy VR256:$src1, VR256:$src2)),
|
||||
(VUNPCKLPDYrr VR256:$src1, VR256:$src2)>, Requires<[HasAVX]>;
|
||||
|
||||
// Shuffle with UNPCKHPD
|
||||
def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, (memopv2f64 addr:$src2))),
|
||||
(VUNPCKHPDrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>;
|
||||
|
@ -5859,6 +5877,10 @@ def : Pat<(v4f64 (X86Unpckhpdy VR256:$src1, (memopv4f64 addr:$src2))),
|
|||
(VUNPCKHPDYrm VR256:$src1, addr:$src2)>, Requires<[HasAVX]>;
|
||||
def : Pat<(v4f64 (X86Unpckhpdy VR256:$src1, VR256:$src2)),
|
||||
(VUNPCKHPDYrr VR256:$src1, VR256:$src2)>, Requires<[HasAVX]>;
|
||||
def : Pat<(v4i64 (X86Unpckhpdy VR256:$src1, (memopv4i64 addr:$src2))),
|
||||
(VUNPCKHPDYrm VR256:$src1, addr:$src2)>, Requires<[HasAVX]>;
|
||||
def : Pat<(v4i64 (X86Unpckhpdy VR256:$src1, VR256:$src2)),
|
||||
(VUNPCKHPDYrr VR256:$src1, VR256:$src2)>, Requires<[HasAVX]>;
|
||||
|
||||
// Shuffle with MOVLHPS
|
||||
def : Pat<(X86Movlhps VR128:$src1,
|
||||
|
|
|
@ -56,3 +56,34 @@ entry:
|
|||
ret <4 x double> %shuffle.i
|
||||
}
|
||||
|
||||
;;;;
|
||||
;;;; Unpack versions using the fp unit for int unpacking
|
||||
;;;;
|
||||
|
||||
; CHECK: vunpckhps
|
||||
define <8 x i32> @unpackhips1(<8 x i32> %src1, <8 x i32> %src2) nounwind uwtable readnone ssp {
|
||||
entry:
|
||||
%shuffle.i = shufflevector <8 x i32> %src1, <8 x i32> %src2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
|
||||
ret <8 x i32> %shuffle.i
|
||||
}
|
||||
|
||||
; CHECK: vunpckhpd
|
||||
define <4 x i64> @unpackhipd1(<4 x i64> %src1, <4 x i64> %src2) nounwind uwtable readnone ssp {
|
||||
entry:
|
||||
%shuffle.i = shufflevector <4 x i64> %src1, <4 x i64> %src2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
|
||||
ret <4 x i64> %shuffle.i
|
||||
}
|
||||
|
||||
; CHECK: vunpcklps
|
||||
define <8 x i32> @unpacklops1(<8 x i32> %src1, <8 x i32> %src2) nounwind uwtable readnone ssp {
|
||||
entry:
|
||||
%shuffle.i = shufflevector <8 x i32> %src1, <8 x i32> %src2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
|
||||
ret <8 x i32> %shuffle.i
|
||||
}
|
||||
|
||||
; CHECK: vunpcklpd
|
||||
define <4 x i64> @unpacklopd1(<4 x i64> %src1, <4 x i64> %src2) nounwind uwtable readnone ssp {
|
||||
entry:
|
||||
%shuffle.i = shufflevector <4 x i64> %src1, <4 x i64> %src2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
|
||||
ret <4 x i64> %shuffle.i
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue