forked from OSchip/llvm-project
AVX512BW: Fix SRA v64i8 lowering. Use PCMPGTM (cmp result in k register) for 512bit vector because PCMPGT supported only for 128/256bit.
Differential Revision: http://reviews.llvm.org/D18204 llvm-svn: 263624
This commit is contained in:
parent
770c627ad0
commit
0ba7b04f5f
|
@ -19233,6 +19233,11 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,
|
||||||
// ashr(R, 7) === cmp_slt(R, 0)
|
// ashr(R, 7) === cmp_slt(R, 0)
|
||||||
if (Op.getOpcode() == ISD::SRA && ShiftAmt == 7) {
|
if (Op.getOpcode() == ISD::SRA && ShiftAmt == 7) {
|
||||||
SDValue Zeros = getZeroVector(VT, Subtarget, DAG, dl);
|
SDValue Zeros = getZeroVector(VT, Subtarget, DAG, dl);
|
||||||
|
if (VT.is512BitVector()) {
|
||||||
|
assert(VT == MVT::v64i8 && "Unexpected element type!");
|
||||||
|
SDValue CMP = DAG.getNode(X86ISD::PCMPGTM, dl, MVT::v64i1, Zeros, R);
|
||||||
|
return DAG.getNode(ISD::SIGN_EXTEND, dl, VT, CMP);
|
||||||
|
}
|
||||||
return DAG.getNode(X86ISD::PCMPGT, dl, VT, Zeros, R);
|
return DAG.getNode(X86ISD::PCMPGT, dl, VT, Zeros, R);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -376,3 +376,21 @@ define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) nounwind {
|
||||||
%shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
|
%shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
|
||||||
ret <64 x i8> %shift
|
ret <64 x i8> %shift
|
||||||
}
|
}
|
||||||
|
|
||||||
|
define <64 x i8> @ashr_const7_v64i8(<64 x i8> %a) {
|
||||||
|
; AVX512DQ-LABEL: ashr_const7_v64i8:
|
||||||
|
; AVX512DQ: ## BB#0:
|
||||||
|
; AVX512DQ-NEXT: vpxor %ymm2, %ymm2, %ymm2
|
||||||
|
; AVX512DQ-NEXT: vpcmpgtb %ymm0, %ymm2, %ymm0
|
||||||
|
; AVX512DQ-NEXT: vpcmpgtb %ymm1, %ymm2, %ymm1
|
||||||
|
; AVX512DQ-NEXT: retq
|
||||||
|
;
|
||||||
|
; AVX512BW-LABEL: ashr_const7_v64i8:
|
||||||
|
; AVX512BW: ## BB#0:
|
||||||
|
; AVX512BW-NEXT: vpxord %zmm1, %zmm1, %zmm1
|
||||||
|
; AVX512BW-NEXT: vpcmpgtb %zmm0, %zmm1, %k0
|
||||||
|
; AVX512BW-NEXT: vpmovm2b %k0, %zmm0
|
||||||
|
; AVX512BW-NEXT: retq
|
||||||
|
%res = ashr <64 x i8> %a, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
|
||||||
|
ret <64 x i8> %res
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue