AVX512BW: Fix SRA v64i8 lowering. Use PCMPGTM (cmp result in k register) for 512bit vector because PCMPGT supported only for 128/256bit.

Differential Revision: http://reviews.llvm.org/D18204 llvm-svn: 263624
2016-03-16 08:48:26 +00:00 · 2016-03-16 08:48:26 +00:00 · 0ba7b04f5f
parent 770c627ad0
commit 0ba7b04f5f
2 changed files with 23 additions and 0 deletions
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@ -19233,6 +19233,11 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,
        // ashr(R, 7)  === cmp_slt(R, 0)
        if (Op.getOpcode() == ISD::SRA && ShiftAmt == 7) {
          SDValue Zeros = getZeroVector(VT, Subtarget, DAG, dl);
          if (VT.is512BitVector()) {
            assert(VT == MVT::v64i8 && "Unexpected element type!");
            SDValue CMP = DAG.getNode(X86ISD::PCMPGTM, dl, MVT::v64i1, Zeros, R);
            return DAG.getNode(ISD::SIGN_EXTEND, dl, VT, CMP);
          }
          return DAG.getNode(X86ISD::PCMPGT, dl, VT, Zeros, R);
        }
--- a/llvm/test/CodeGen/X86/vector-shift-ashr-512.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-ashr-512.ll
@ -376,3 +376,21 @@ define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) nounwind {
  %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
  ret <64 x i8> %shift
 }
 define <64 x i8> @ashr_const7_v64i8(<64 x i8> %a) {
 ; AVX512DQ-LABEL: ashr_const7_v64i8:
 ; AVX512DQ:       ## BB#0:
 ; AVX512DQ-NEXT:    vpxor %ymm2, %ymm2, %ymm2
 ; AVX512DQ-NEXT:    vpcmpgtb %ymm0, %ymm2, %ymm0
 ; AVX512DQ-NEXT:    vpcmpgtb %ymm1, %ymm2, %ymm1
 ; AVX512DQ-NEXT:    retq
 ;
 ; AVX512BW-LABEL: ashr_const7_v64i8:
 ; AVX512BW:       ## BB#0:
 ; AVX512BW-NEXT:    vpxord %zmm1, %zmm1, %zmm1
 ; AVX512BW-NEXT:    vpcmpgtb %zmm0, %zmm1, %k0
 ; AVX512BW-NEXT:    vpmovm2b %k0, %zmm0
 ; AVX512BW-NEXT:    retq
  %res = ashr <64 x i8> %a, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
  ret <64 x i8> %res
 }