forked from OSchip/llvm-project
X86: Custom lower sext v16i8 to v16i16, and the corresponding truncate.
Also update the cost model. llvm-svn: 193270
This commit is contained in:
parent
0eb8bbdeab
commit
0ccab2d66c
|
@ -1150,9 +1150,6 @@ void X86TargetLowering::resetOperationActions() {
|
|||
setOperationAction(ISD::FNEG, MVT::v4f64, Custom);
|
||||
setOperationAction(ISD::FABS, MVT::v4f64, Custom);
|
||||
|
||||
setOperationAction(ISD::TRUNCATE, MVT::v8i16, Custom);
|
||||
setOperationAction(ISD::TRUNCATE, MVT::v4i32, Custom);
|
||||
|
||||
setOperationAction(ISD::FP_TO_SINT, MVT::v8i16, Custom);
|
||||
|
||||
setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal);
|
||||
|
@ -1160,8 +1157,6 @@ void X86TargetLowering::resetOperationActions() {
|
|||
setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal);
|
||||
setOperationAction(ISD::FP_ROUND, MVT::v4f32, Legal);
|
||||
|
||||
setOperationAction(ISD::ZERO_EXTEND, MVT::v16i16, Custom);
|
||||
setOperationAction(ISD::ZERO_EXTEND, MVT::v8i32, Custom);
|
||||
setOperationAction(ISD::UINT_TO_FP, MVT::v8i8, Custom);
|
||||
setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Custom);
|
||||
|
||||
|
@ -1194,10 +1189,16 @@ void X86TargetLowering::resetOperationActions() {
|
|||
|
||||
setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom);
|
||||
setOperationAction(ISD::SIGN_EXTEND, MVT::v8i32, Custom);
|
||||
setOperationAction(ISD::SIGN_EXTEND, MVT::v16i16, Custom);
|
||||
setOperationAction(ISD::ZERO_EXTEND, MVT::v4i64, Custom);
|
||||
setOperationAction(ISD::ZERO_EXTEND, MVT::v8i32, Custom);
|
||||
setOperationAction(ISD::ZERO_EXTEND, MVT::v16i16, Custom);
|
||||
setOperationAction(ISD::ANY_EXTEND, MVT::v4i64, Custom);
|
||||
setOperationAction(ISD::ANY_EXTEND, MVT::v8i32, Custom);
|
||||
setOperationAction(ISD::ANY_EXTEND, MVT::v16i16, Custom);
|
||||
setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom);
|
||||
setOperationAction(ISD::TRUNCATE, MVT::v8i16, Custom);
|
||||
setOperationAction(ISD::TRUNCATE, MVT::v4i32, Custom);
|
||||
|
||||
if (Subtarget->hasFMA() || Subtarget->hasFMA4()) {
|
||||
setOperationAction(ISD::FMA, MVT::v8f32, Legal);
|
||||
|
@ -10391,7 +10392,8 @@ static SDValue LowerSIGN_EXTEND(SDValue Op, const X86Subtarget *Subtarget,
|
|||
return LowerSIGN_EXTEND_AVX512(Op, DAG);
|
||||
|
||||
if ((VT != MVT::v4i64 || InVT != MVT::v4i32) &&
|
||||
(VT != MVT::v8i32 || InVT != MVT::v8i16))
|
||||
(VT != MVT::v8i32 || InVT != MVT::v8i16) &&
|
||||
(VT != MVT::v16i16 || InVT != MVT::v16i8))
|
||||
return SDValue();
|
||||
|
||||
if (Subtarget->hasInt256())
|
||||
|
|
|
@ -5602,16 +5602,19 @@ let Predicates = [HasAVX2] in {
|
|||
|
||||
def : Pat<(v4i64 (X86vsmovl (v4i32 VR128:$src))), (VPMOVSXDQYrr VR128:$src)>;
|
||||
def : Pat<(v8i32 (X86vsmovl (v8i16 VR128:$src))), (VPMOVSXWDYrr VR128:$src)>;
|
||||
def : Pat<(v16i16 (X86vsmovl (v16i8 VR128:$src))), (VPMOVSXBWYrr VR128:$src)>;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX] in {
|
||||
def : Pat<(v2i64 (X86vsmovl (v4i32 VR128:$src))), (VPMOVSXDQrr VR128:$src)>;
|
||||
def : Pat<(v4i32 (X86vsmovl (v8i16 VR128:$src))), (VPMOVSXWDrr VR128:$src)>;
|
||||
def : Pat<(v8i16 (X86vsmovl (v16i8 VR128:$src))), (VPMOVSXBWrr VR128:$src)>;
|
||||
}
|
||||
|
||||
let Predicates = [UseSSE41] in {
|
||||
def : Pat<(v2i64 (X86vsmovl (v4i32 VR128:$src))), (PMOVSXDQrr VR128:$src)>;
|
||||
def : Pat<(v4i32 (X86vsmovl (v8i16 VR128:$src))), (PMOVSXWDrr VR128:$src)>;
|
||||
def : Pat<(v8i16 (X86vsmovl (v16i8 VR128:$src))), (PMOVSXBWrr VR128:$src)>;
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -401,12 +401,15 @@ unsigned X86TTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const {
|
|||
|
||||
static const TypeConversionCostTblEntry<MVT::SimpleValueType>
|
||||
AVXConversionTbl[] = {
|
||||
{ ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 1 },
|
||||
{ ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 1 },
|
||||
{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 1 },
|
||||
{ ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 1 },
|
||||
{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 1 },
|
||||
{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 1 },
|
||||
{ ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 1 },
|
||||
{ ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 1 },
|
||||
{ ISD::TRUNCATE, MVT::v16i8, MVT::v16i16, 2 },
|
||||
|
||||
{ ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i1, 8 },
|
||||
{ ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i8, 8 },
|
||||
|
|
|
@ -38,6 +38,10 @@ define i32 @zext_sext(<8 x i1> %in) {
|
|||
;CHECK: cost of 9 {{.*}} sext
|
||||
%S = sext <8 x i1> %in to <8 x i32>
|
||||
|
||||
;CHECK: cost of 1 {{.*}} zext
|
||||
%A1 = zext <16 x i8> undef to <16 x i16>
|
||||
;CHECK: cost of 1 {{.*}} sext
|
||||
%A2 = sext <16 x i8> undef to <16 x i16>
|
||||
;CHECK: cost of 1 {{.*}} sext
|
||||
%A = sext <8 x i16> undef to <8 x i32>
|
||||
;CHECK: cost of 1 {{.*}} zext
|
||||
|
@ -51,11 +55,13 @@ define i32 @zext_sext(<8 x i1> %in) {
|
|||
|
||||
;CHECK: cost of 1 {{.*}} zext
|
||||
%D = zext <4 x i32> undef to <4 x i64>
|
||||
;CHECK: cost of 1 {{.*}} trunc
|
||||
|
||||
;CHECK: cost of 1 {{.*}} trunc
|
||||
%E = trunc <4 x i64> undef to <4 x i32>
|
||||
;CHECK: cost of 1 {{.*}} trunc
|
||||
%F = trunc <8 x i32> undef to <8 x i16>
|
||||
;CHECK: cost of 2 {{.*}} trunc
|
||||
%F1 = trunc <16 x i16> undef to <16 x i8>
|
||||
|
||||
;CHECK: cost of 3 {{.*}} trunc
|
||||
%G = trunc <8 x i64> undef to <8 x i32>
|
||||
|
|
|
@ -154,6 +154,17 @@ define <4 x i64> @sext_4i1_to_4i64(<4 x i1> %mask) {
|
|||
ret <4 x i64> %extmask
|
||||
}
|
||||
|
||||
; AVX-LABEL: sext_16i8_to_16i16
|
||||
; AVX: vpmovsxbw
|
||||
; AVX: vmovhlps
|
||||
; AVX: vpmovsxbw
|
||||
; AVX: ret
|
||||
define <16 x i16> @sext_16i8_to_16i16(<16 x i8> *%ptr) {
|
||||
%X = load <16 x i8>* %ptr
|
||||
%Y = sext <16 x i8> %X to <16 x i16>
|
||||
ret <16 x i16> %Y
|
||||
}
|
||||
|
||||
; AVX: sext_4i8_to_4i64
|
||||
; AVX: vpslld $24
|
||||
; AVX: vpsrad $24
|
||||
|
|
|
@ -12,4 +12,9 @@ define <8 x i16> @trunc_32_16(<8 x i32> %A) nounwind uwtable readnone ssp{
|
|||
%B = trunc <8 x i32> %A to <8 x i16>
|
||||
ret <8 x i16>%B
|
||||
}
|
||||
|
||||
define <16 x i8> @trunc_16_8(<16 x i16> %A) nounwind uwtable readnone ssp{
|
||||
; CHECK-LABEL: trunc_16_8
|
||||
; CHECK: pshufb
|
||||
%B = trunc <16 x i16> %A to <16 x i8>
|
||||
ret <16 x i8> %B
|
||||
}
|
||||
|
|
|
@ -72,6 +72,25 @@ define <16 x i16> @zext_16i8_16i16(<16 x i8> %z) {
|
|||
ret <16 x i16> %t
|
||||
}
|
||||
|
||||
; CHECK-LABEL: sext_16i8_16i16:
|
||||
; CHECK: vpmovsxbw
|
||||
; CHECK-NOT: vinsert
|
||||
; CHECK: ret
|
||||
define <16 x i16> @sext_16i8_16i16(<16 x i8> %z) {
|
||||
%t = sext <16 x i8> %z to <16 x i16>
|
||||
ret <16 x i16> %t
|
||||
}
|
||||
|
||||
; CHECK-LABEL: trunc_16i16_16i8:
|
||||
; CHECK: vpshufb
|
||||
; CHECK: vpshufb
|
||||
; CHECK: vpor
|
||||
; CHECK: ret
|
||||
define <16 x i8> @trunc_16i16_16i8(<16 x i16> %z) {
|
||||
%t = trunc <16 x i16> %z to <16 x i8>
|
||||
ret <16 x i8> %t
|
||||
}
|
||||
|
||||
; CHECK: load_sext_test1
|
||||
; CHECK: vpmovsxdq (%r{{[^,]*}}), %ymm{{.*}}
|
||||
; CHECK: ret
|
||||
|
|
|
@ -86,8 +86,7 @@ define void @test6(<16 x i8>* %in, <16 x i16>* %out) nounwind {
|
|||
ret void
|
||||
|
||||
; AVX2-LABEL: test6:
|
||||
; FIXME: v16i8 -> v16i16 is scalarized.
|
||||
; AVX2-NOT: pmovsx
|
||||
; AVX2: vpmovsxbw
|
||||
}
|
||||
|
||||
define void @test7(<2 x i16>* %in, <2 x i64>* %out) nounwind {
|
||||
|
|
Loading…
Reference in New Issue