forked from OSchip/llvm-project
[X86][AVX512] Move v2i64/v4i64 VPABS lowering to tablegen
Extend NoVLX targets to use the 512-bit versions llvm-svn: 302359
This commit is contained in:
parent
0f4e94673d
commit
fea153f341
|
@ -1240,8 +1240,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
|||
}
|
||||
}
|
||||
if (Subtarget.hasVLX()) {
|
||||
setOperationAction(ISD::ABS, MVT::v4i64, Legal);
|
||||
setOperationAction(ISD::ABS, MVT::v2i64, Legal);
|
||||
setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal);
|
||||
setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Legal);
|
||||
setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal);
|
||||
|
@ -1308,6 +1306,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
|||
|
||||
setOperationAction(ISD::MUL, MVT::v16i32, Legal);
|
||||
|
||||
// NonVLX sub-targets extend 128/256 vectors to use the 512 version.
|
||||
setOperationAction(ISD::ABS, MVT::v4i64, Legal);
|
||||
setOperationAction(ISD::ABS, MVT::v2i64, Legal);
|
||||
|
||||
for (auto VT : { MVT::v8i1, MVT::v16i1 }) {
|
||||
setOperationAction(ISD::ADD, VT, Custom);
|
||||
setOperationAction(ISD::SUB, VT, Custom);
|
||||
|
|
|
@ -8631,6 +8631,20 @@ multiclass avx512_unary_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
|
|||
|
||||
defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", abs>;
|
||||
|
||||
// VPABS: Use 512bit version to implement 128/256 bit in case NoVLX.
|
||||
let Predicates = [HasAVX512, NoVLX] in {
|
||||
def : Pat<(v4i64 (abs VR256X:$src)),
|
||||
(EXTRACT_SUBREG
|
||||
(VPABSQZrr
|
||||
(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)),
|
||||
sub_ymm)>;
|
||||
def : Pat<(v2i64 (abs VR128X:$src)),
|
||||
(EXTRACT_SUBREG
|
||||
(VPABSQZrr
|
||||
(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)),
|
||||
sub_xmm)>;
|
||||
}
|
||||
|
||||
multiclass avx512_ctlz<bits<8> opc, string OpcodeStr, Predicate prd>{
|
||||
|
||||
defm NAME : avx512_unary_rm_vl_dq<opc, opc, OpcodeStr, ctlz, prd>;
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=AVX2
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512F
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512VL
|
||||
|
||||
; fold (abs c1) -> c2
|
||||
define <4 x i32> @combine_v4i32_abs_constant() {
|
||||
|
@ -46,17 +48,29 @@ define <32 x i8> @combine_v32i8_abs_abs(<32 x i8> %a) {
|
|||
}
|
||||
|
||||
define <4 x i64> @combine_v4i64_abs_abs(<4 x i64> %a) {
|
||||
; CHECK-LABEL: combine_v4i64_abs_abs:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: vpsrad $31, %ymm0, %ymm1
|
||||
; CHECK-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[1,1,3,3,5,5,7,7]
|
||||
; CHECK-NEXT: vpaddq %ymm1, %ymm0, %ymm0
|
||||
; CHECK-NEXT: vpxor %ymm1, %ymm0, %ymm0
|
||||
; CHECK-NEXT: vpsrad $31, %ymm0, %ymm1
|
||||
; CHECK-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[1,1,3,3,5,5,7,7]
|
||||
; CHECK-NEXT: vpaddq %ymm1, %ymm0, %ymm0
|
||||
; CHECK-NEXT: vpxor %ymm1, %ymm0, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
; AVX2-LABEL: combine_v4i64_abs_abs:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vpsrad $31, %ymm0, %ymm1
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[1,1,3,3,5,5,7,7]
|
||||
; AVX2-NEXT: vpaddq %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpsrad $31, %ymm0, %ymm1
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[1,1,3,3,5,5,7,7]
|
||||
; AVX2-NEXT: vpaddq %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512F-LABEL: combine_v4i64_abs_abs:
|
||||
; AVX512F: # BB#0:
|
||||
; AVX512F-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
|
||||
; AVX512F-NEXT: vpabsq %zmm0, %zmm0
|
||||
; AVX512F-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: combine_v4i64_abs_abs:
|
||||
; AVX512VL: # BB#0:
|
||||
; AVX512VL-NEXT: vpabsq %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: retq
|
||||
%n1 = sub <4 x i64> zeroinitializer, %a
|
||||
%b1 = icmp slt <4 x i64> %a, zeroinitializer
|
||||
%a1 = select <4 x i1> %b1, <4 x i64> %n1, <4 x i64> %a
|
||||
|
|
Loading…
Reference in New Issue