diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 71b23e2ccb0c..e9510af775fe 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -5940,7 +5940,9 @@ static inline unsigned getUNPCKLOpcode(EVT VT) { case MVT::v2i64: return X86ISD::PUNPCKLQDQ; case MVT::v4f32: return X86ISD::UNPCKLPS; case MVT::v2f64: return X86ISD::UNPCKLPD; + case MVT::v8i32: // Use fp unit for int unpack. case MVT::v8f32: return X86ISD::VUNPCKLPSY; + case MVT::v4i64: // Use fp unit for int unpack. case MVT::v4f64: return X86ISD::VUNPCKLPDY; case MVT::v16i8: return X86ISD::PUNPCKLBW; case MVT::v8i16: return X86ISD::PUNPCKLWD; @@ -5956,7 +5958,9 @@ static inline unsigned getUNPCKHOpcode(EVT VT) { case MVT::v2i64: return X86ISD::PUNPCKHQDQ; case MVT::v4f32: return X86ISD::UNPCKHPS; case MVT::v2f64: return X86ISD::UNPCKHPD; + case MVT::v8i32: // Use fp unit for int unpack. case MVT::v8f32: return X86ISD::VUNPCKHPSY; + case MVT::v4i64: // Use fp unit for int unpack. case MVT::v4f64: return X86ISD::VUNPCKHPDY; case MVT::v16i8: return X86ISD::PUNPCKHBW; case MVT::v8i16: return X86ISD::PUNPCKHWD; diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 2b8180aac04c..56b05e17ac2b 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -5799,18 +5799,24 @@ def : Pat<(X86Movddup (bc_v2f64 // Shuffle with UNPCKLPS def : Pat<(v4f32 (X86Unpcklps VR128:$src1, (memopv4f32 addr:$src2))), (VUNPCKLPSrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>; -def : Pat<(v8f32 (X86Unpcklpsy VR256:$src1, (memopv8f32 addr:$src2))), - (VUNPCKLPSYrm VR256:$src1, addr:$src2)>, Requires<[HasAVX]>; def : Pat<(v4f32 (X86Unpcklps VR128:$src1, (memopv4f32 addr:$src2))), (UNPCKLPSrm VR128:$src1, addr:$src2)>; def : Pat<(v4f32 (X86Unpcklps VR128:$src1, VR128:$src2)), (VUNPCKLPSrr VR128:$src1, VR128:$src2)>, Requires<[HasAVX]>; -def : Pat<(v8f32 (X86Unpcklpsy VR256:$src1, VR256:$src2)), - (VUNPCKLPSYrr VR256:$src1, VR256:$src2)>, Requires<[HasAVX]>; def : Pat<(v4f32 (X86Unpcklps VR128:$src1, VR128:$src2)), (UNPCKLPSrr VR128:$src1, VR128:$src2)>; +// Shuffle with VUNPCKHPSY +def : Pat<(v8f32 (X86Unpcklpsy VR256:$src1, (memopv8f32 addr:$src2))), + (VUNPCKLPSYrm VR256:$src1, addr:$src2)>, Requires<[HasAVX]>; +def : Pat<(v8f32 (X86Unpcklpsy VR256:$src1, VR256:$src2)), + (VUNPCKLPSYrr VR256:$src1, VR256:$src2)>, Requires<[HasAVX]>; +def : Pat<(v8i32 (X86Unpcklpsy VR256:$src1, VR256:$src2)), + (VUNPCKLPSYrr VR256:$src1, VR256:$src2)>, Requires<[HasAVX]>; +def : Pat<(v8i32 (X86Unpcklpsy VR256:$src1, (memopv8i32 addr:$src2))), + (VUNPCKLPSYrm VR256:$src1, addr:$src2)>, Requires<[HasAVX]>; + // Shuffle with UNPCKHPS def : Pat<(v4f32 (X86Unpckhps VR128:$src1, (memopv4f32 addr:$src2))), (VUNPCKHPSrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>; @@ -5828,21 +5834,33 @@ def : Pat<(v8f32 (X86Unpckhpsy VR256:$src1, (memopv8f32 addr:$src2))), def : Pat<(v8f32 (X86Unpckhpsy VR256:$src1, VR256:$src2)), (VUNPCKHPSYrr VR256:$src1, VR256:$src2)>, Requires<[HasAVX]>; +def : Pat<(v8i32 (X86Unpckhpsy VR256:$src1, (memopv8i32 addr:$src2))), + (VUNPCKHPSYrm VR256:$src1, addr:$src2)>, Requires<[HasAVX]>; +def : Pat<(v8i32 (X86Unpckhpsy VR256:$src1, VR256:$src2)), + (VUNPCKHPSYrr VR256:$src1, VR256:$src2)>, Requires<[HasAVX]>; + // Shuffle with UNPCKLPD def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, (memopv2f64 addr:$src2))), (VUNPCKLPDrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>; -def : Pat<(v4f64 (X86Unpcklpdy VR256:$src1, (memopv4f64 addr:$src2))), - (VUNPCKLPDYrm VR256:$src1, addr:$src2)>, Requires<[HasAVX]>; def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, (memopv2f64 addr:$src2))), (UNPCKLPDrm VR128:$src1, addr:$src2)>; def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, VR128:$src2)), (VUNPCKLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasAVX]>; -def : Pat<(v4f64 (X86Unpcklpdy VR256:$src1, VR256:$src2)), - (VUNPCKLPDYrr VR256:$src1, VR256:$src2)>, Requires<[HasAVX]>; def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, VR128:$src2)), (UNPCKLPDrr VR128:$src1, VR128:$src2)>; +// Shuffle with VUNPCKLPDY +def : Pat<(v4f64 (X86Unpcklpdy VR256:$src1, (memopv4f64 addr:$src2))), + (VUNPCKLPDYrm VR256:$src1, addr:$src2)>, Requires<[HasAVX]>; +def : Pat<(v4f64 (X86Unpcklpdy VR256:$src1, VR256:$src2)), + (VUNPCKLPDYrr VR256:$src1, VR256:$src2)>, Requires<[HasAVX]>; + +def : Pat<(v4i64 (X86Unpcklpdy VR256:$src1, (memopv4i64 addr:$src2))), + (VUNPCKLPDYrm VR256:$src1, addr:$src2)>, Requires<[HasAVX]>; +def : Pat<(v4i64 (X86Unpcklpdy VR256:$src1, VR256:$src2)), + (VUNPCKLPDYrr VR256:$src1, VR256:$src2)>, Requires<[HasAVX]>; + // Shuffle with UNPCKHPD def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, (memopv2f64 addr:$src2))), (VUNPCKHPDrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>; @@ -5859,6 +5877,10 @@ def : Pat<(v4f64 (X86Unpckhpdy VR256:$src1, (memopv4f64 addr:$src2))), (VUNPCKHPDYrm VR256:$src1, addr:$src2)>, Requires<[HasAVX]>; def : Pat<(v4f64 (X86Unpckhpdy VR256:$src1, VR256:$src2)), (VUNPCKHPDYrr VR256:$src1, VR256:$src2)>, Requires<[HasAVX]>; +def : Pat<(v4i64 (X86Unpckhpdy VR256:$src1, (memopv4i64 addr:$src2))), + (VUNPCKHPDYrm VR256:$src1, addr:$src2)>, Requires<[HasAVX]>; +def : Pat<(v4i64 (X86Unpckhpdy VR256:$src1, VR256:$src2)), + (VUNPCKHPDYrr VR256:$src1, VR256:$src2)>, Requires<[HasAVX]>; // Shuffle with MOVLHPS def : Pat<(X86Movlhps VR128:$src1, diff --git a/llvm/test/CodeGen/X86/avx-unpack.ll b/llvm/test/CodeGen/X86/avx-unpack.ll index 4e906ee15553..d420101339f5 100644 --- a/llvm/test/CodeGen/X86/avx-unpack.ll +++ b/llvm/test/CodeGen/X86/avx-unpack.ll @@ -56,3 +56,34 @@ entry: ret <4 x double> %shuffle.i } +;;;; +;;;; Unpack versions using the fp unit for int unpacking +;;;; + +; CHECK: vunpckhps +define <8 x i32> @unpackhips1(<8 x i32> %src1, <8 x i32> %src2) nounwind uwtable readnone ssp { +entry: + %shuffle.i = shufflevector <8 x i32> %src1, <8 x i32> %src2, <8 x i32> + ret <8 x i32> %shuffle.i +} + +; CHECK: vunpckhpd +define <4 x i64> @unpackhipd1(<4 x i64> %src1, <4 x i64> %src2) nounwind uwtable readnone ssp { +entry: + %shuffle.i = shufflevector <4 x i64> %src1, <4 x i64> %src2, <4 x i32> + ret <4 x i64> %shuffle.i +} + +; CHECK: vunpcklps +define <8 x i32> @unpacklops1(<8 x i32> %src1, <8 x i32> %src2) nounwind uwtable readnone ssp { +entry: + %shuffle.i = shufflevector <8 x i32> %src1, <8 x i32> %src2, <8 x i32> + ret <8 x i32> %shuffle.i +} + +; CHECK: vunpcklpd +define <4 x i64> @unpacklopd1(<4 x i64> %src1, <4 x i64> %src2) nounwind uwtable readnone ssp { +entry: + %shuffle.i = shufflevector <4 x i64> %src1, <4 x i64> %src2, <4 x i32> + ret <4 x i64> %shuffle.i +}