forked from OSchip/llvm-project
AVX-512: Added VRCP28 and VRSQRT28 instructions and intrinsics.
llvm-svn: 192283
This commit is contained in:
parent
1fdb076a31
commit
a3a714082b
|
@ -2745,29 +2745,54 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
|||
def int_x86_avx512_sqrt_ps_512 : GCCBuiltin<"__builtin_ia32_sqrtps512">,
|
||||
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty], [IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_rcp14_ps_512 : GCCBuiltin<"__builtin_ia32_avx512_rcp14ps512">,
|
||||
def int_x86_avx512_rcp14_ps_512 : GCCBuiltin<"__builtin_ia32_rcp14ps512">,
|
||||
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_rcp14_pd_512 : GCCBuiltin<"__builtin_ia32_avx512_rcp14pd512">,
|
||||
def int_x86_avx512_rcp14_pd_512 : GCCBuiltin<"__builtin_ia32_rcp14pd512">,
|
||||
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_rcp14_ss : GCCBuiltin<"__builtin_ia32_avx512_rcp14ss">,
|
||||
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty],
|
||||
def int_x86_avx512_rcp14_ss : GCCBuiltin<"__builtin_ia32_rcp14ss">,
|
||||
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_rcp14_sd : GCCBuiltin<"__builtin_ia32_avx512_rcp14sd">,
|
||||
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty],
|
||||
def int_x86_avx512_rcp14_sd : GCCBuiltin<"__builtin_ia32_rcp14sd">,
|
||||
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_rsqrt14_ps_512 : GCCBuiltin<"__builtin_ia32_avx512_rsqrt14ps512">,
|
||||
def int_x86_avx512_rsqrt14_ps_512 : GCCBuiltin<"__builtin_ia32_rsqrt14ps512">,
|
||||
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_rsqrt14_pd_512 : GCCBuiltin<"__builtin_ia32_avx512_rsqrt14pd512">,
|
||||
def int_x86_avx512_rsqrt14_pd_512 : GCCBuiltin<"__builtin_ia32_rsqrt14pd512">,
|
||||
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_rsqrt14_ss : GCCBuiltin<"__builtin_ia32_avx512_rsqrt14ss">,
|
||||
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty],
|
||||
def int_x86_avx512_rsqrt14_ss : GCCBuiltin<"__builtin_ia32_rsqrt14ss">,
|
||||
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_rsqrt14_sd : GCCBuiltin<"__builtin_ia32_avx512_rsqrt14sd">,
|
||||
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty],
|
||||
def int_x86_avx512_rsqrt14_sd : GCCBuiltin<"__builtin_ia32_rsqrt14sd">,
|
||||
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_rcp28_ps_512 : GCCBuiltin<"__builtin_ia32_rcp28ps512">,
|
||||
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_rcp28_pd_512 : GCCBuiltin<"__builtin_ia32_rcp28pd512">,
|
||||
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_rcp28_ss : GCCBuiltin<"__builtin_ia32_rcp28ss">,
|
||||
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_rcp28_sd : GCCBuiltin<"__builtin_ia32_rcp28sd">,
|
||||
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_rsqrt28_ps_512 : GCCBuiltin<"__builtin_ia32_rsqrt28ps512">,
|
||||
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_rsqrt28_pd_512 : GCCBuiltin<"__builtin_ia32_rsqrt28pd512">,
|
||||
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_rsqrt28_ss : GCCBuiltin<"__builtin_ia32_rsqrt28ss">,
|
||||
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_rsqrt28_sd : GCCBuiltin<"__builtin_ia32_rsqrt28sd">,
|
||||
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty],
|
||||
[IntrNoMem]>;
|
||||
}
|
||||
|
||||
|
@ -2910,14 +2935,14 @@ let TargetPrefix = "x86" in {
|
|||
}
|
||||
|
||||
let TargetPrefix = "x86" in {
|
||||
def int_x86_avx512_mskblend_ps_512 : GCCBuiltin<"__builtin_ia32_avx512_mskblendps512">,
|
||||
def int_x86_avx512_mskblend_ps_512 : GCCBuiltin<"__builtin_ia32_mskblendps512">,
|
||||
Intrinsic<[llvm_v16f32_ty],
|
||||
[llvm_i16_ty, llvm_v16f32_ty, llvm_v16f32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_cmpeq_pi_512 : GCCBuiltin<"__builtin_ia32_avx512_cmpeqpi512">,
|
||||
def int_x86_avx512_cmpeq_pi_512 : GCCBuiltin<"__builtin_ia32_cmpeqpi512">,
|
||||
Intrinsic<[llvm_i16_ty], [llvm_v16i32_ty, llvm_v16i32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_and_pi : GCCBuiltin<"__builtin_ia32_avx512_andpi512">,
|
||||
def int_x86_avx512_and_pi : GCCBuiltin<"__builtin_ia32_andpi512">,
|
||||
Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty],
|
||||
[IntrNoMem]>;
|
||||
}
|
||||
|
|
|
@ -2643,8 +2643,7 @@ multiclass avx512_fp_unop_p_int<bits<8> opc, string OpcodeStr,
|
|||
}
|
||||
|
||||
/// avx512_fp_unop_s - AVX-512 unops in scalar form.
|
||||
multiclass avx512_fp_unop_s<bits<8> opc, string OpcodeStr,
|
||||
Intrinsic F32Int, Intrinsic F64Int> {
|
||||
multiclass avx512_fp_unop_s<bits<8> opc, string OpcodeStr> {
|
||||
let hasSideEffects = 0 in {
|
||||
def SSZr : AVX5128I<opc, MRMSrcReg, (outs FR32X:$dst),
|
||||
(ins FR32X:$src1, FR32X:$src2),
|
||||
|
@ -2661,8 +2660,7 @@ multiclass avx512_fp_unop_s<bits<8> opc, string OpcodeStr,
|
|||
(ins VR128X:$src1, ssmem:$src2),
|
||||
!strconcat(OpcodeStr,
|
||||
"ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[(set VR128X:$dst, (F32Int VR128X:$src1, sse_load_f32:$src2))]>,
|
||||
EVEX_4V, EVEX_CD8<32, CD8VT1>;
|
||||
[]>, EVEX_4V, EVEX_CD8<32, CD8VT1>;
|
||||
}
|
||||
def SDZr : AVX5128I<opc, MRMSrcReg, (outs FR64X:$dst),
|
||||
(ins FR64X:$src1, FR64X:$src2),
|
||||
|
@ -2674,29 +2672,67 @@ multiclass avx512_fp_unop_s<bits<8> opc, string OpcodeStr,
|
|||
(ins FR64X:$src1, f64mem:$src2),
|
||||
!strconcat(OpcodeStr,
|
||||
"sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>,
|
||||
EVEX_4V, VEX_W, EVEX_CD8<32, CD8VT1>;
|
||||
EVEX_4V, VEX_W, EVEX_CD8<64, CD8VT1>;
|
||||
def SDZm_Int : AVX5128I<opc, MRMSrcMem, (outs VR128X:$dst),
|
||||
(ins VR128X:$src1, sdmem:$src2),
|
||||
!strconcat(OpcodeStr,
|
||||
"sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[(set VR128X:$dst, (F64Int VR128X:$src1, sse_load_f64:$src2))]>,
|
||||
EVEX_4V, VEX_W, EVEX_CD8<32, CD8VT1>;
|
||||
[]>, EVEX_4V, VEX_W, EVEX_CD8<64, CD8VT1>;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
defm VRCP14 : avx512_fp_unop_s<0x4D, "vrcp14", int_x86_avx512_rcp14_ss,
|
||||
int_x86_avx512_rcp14_sd>,
|
||||
defm VRCP14 : avx512_fp_unop_s<0x4D, "vrcp14">,
|
||||
avx512_fp_unop_p<0x4C, "vrcp14", X86frcp>,
|
||||
avx512_fp_unop_p_int<0x4C, "vrcp14",
|
||||
int_x86_avx512_rcp14_ps_512, int_x86_avx512_rcp14_pd_512>;
|
||||
|
||||
defm VRSQRT14 : avx512_fp_unop_s<0x4F, "vrsqrt14", int_x86_avx512_rsqrt14_ss,
|
||||
int_x86_avx512_rsqrt14_sd>,
|
||||
defm VRSQRT14 : avx512_fp_unop_s<0x4F, "vrsqrt14">,
|
||||
avx512_fp_unop_p<0x4E, "vrsqrt14", X86frsqrt>,
|
||||
avx512_fp_unop_p_int<0x4E, "vrsqrt14",
|
||||
int_x86_avx512_rsqrt14_ps_512, int_x86_avx512_rsqrt14_pd_512>;
|
||||
|
||||
def : Pat<(int_x86_avx512_rsqrt14_ss VR128X:$src),
|
||||
(COPY_TO_REGCLASS (VRSQRT14SSZr (f32 (IMPLICIT_DEF)),
|
||||
(COPY_TO_REGCLASS VR128X:$src, FR32)),
|
||||
VR128X)>;
|
||||
def : Pat<(int_x86_avx512_rsqrt14_ss sse_load_f32:$src),
|
||||
(VRSQRT14SSZm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>;
|
||||
|
||||
def : Pat<(int_x86_avx512_rcp14_ss VR128X:$src),
|
||||
(COPY_TO_REGCLASS (VRCP14SSZr (f32 (IMPLICIT_DEF)),
|
||||
(COPY_TO_REGCLASS VR128X:$src, FR32)),
|
||||
VR128X)>;
|
||||
def : Pat<(int_x86_avx512_rcp14_ss sse_load_f32:$src),
|
||||
(VRCP14SSZm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>;
|
||||
|
||||
let AddedComplexity = 20, Predicates = [HasERI] in {
|
||||
defm VRCP28 : avx512_fp_unop_s<0xCB, "vrcp28">,
|
||||
avx512_fp_unop_p<0xCA, "vrcp28", X86frcp>,
|
||||
avx512_fp_unop_p_int<0xCA, "vrcp28",
|
||||
int_x86_avx512_rcp28_ps_512, int_x86_avx512_rcp28_pd_512>;
|
||||
|
||||
defm VRSQRT28 : avx512_fp_unop_s<0xCD, "vrsqrt28">,
|
||||
avx512_fp_unop_p<0xCC, "vrsqrt28", X86frsqrt>,
|
||||
avx512_fp_unop_p_int<0xCC, "vrsqrt28",
|
||||
int_x86_avx512_rsqrt28_ps_512, int_x86_avx512_rsqrt28_pd_512>;
|
||||
}
|
||||
|
||||
let Predicates = [HasERI] in {
|
||||
def : Pat<(int_x86_avx512_rsqrt28_ss VR128X:$src),
|
||||
(COPY_TO_REGCLASS (VRSQRT28SSZr (f32 (IMPLICIT_DEF)),
|
||||
(COPY_TO_REGCLASS VR128X:$src, FR32)),
|
||||
VR128X)>;
|
||||
def : Pat<(int_x86_avx512_rsqrt28_ss sse_load_f32:$src),
|
||||
(VRSQRT28SSZm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>;
|
||||
|
||||
def : Pat<(int_x86_avx512_rcp28_ss VR128X:$src),
|
||||
(COPY_TO_REGCLASS (VRCP28SSZr (f32 (IMPLICIT_DEF)),
|
||||
(COPY_TO_REGCLASS VR128X:$src, FR32)),
|
||||
VR128X)>;
|
||||
def : Pat<(int_x86_avx512_rcp28_ss sse_load_f32:$src),
|
||||
(VRCP28SSZm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>;
|
||||
}
|
||||
multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
Intrinsic V16F32Int, Intrinsic V8F64Int,
|
||||
OpndItins itins_s, OpndItins itins_d> {
|
||||
|
@ -2810,28 +2846,45 @@ defm VSQRT : avx512_sqrt_scalar<0x51, "sqrt",
|
|||
int_x86_avx512_sqrt_ps_512, int_x86_avx512_sqrt_pd_512,
|
||||
SSE_SQRTPS, SSE_SQRTPD>;
|
||||
|
||||
def : Pat<(f32 (fsqrt FR32X:$src)),
|
||||
(VSQRTSSZr (f32 (IMPLICIT_DEF)), FR32X:$src)>;
|
||||
def : Pat<(f32 (fsqrt (load addr:$src))),
|
||||
(VSQRTSSZm (f32 (IMPLICIT_DEF)), addr:$src)>,
|
||||
Requires<[OptForSize]>;
|
||||
def : Pat<(f64 (fsqrt FR64X:$src)),
|
||||
(VSQRTSDZr (f64 (IMPLICIT_DEF)), FR64X:$src)>;
|
||||
def : Pat<(f64 (fsqrt (load addr:$src))),
|
||||
(VSQRTSDZm (f64 (IMPLICIT_DEF)), addr:$src)>,
|
||||
Requires<[OptForSize]>;
|
||||
let Predicates = [HasAVX512] in {
|
||||
def : Pat<(f32 (fsqrt FR32X:$src)),
|
||||
(VSQRTSSZr (f32 (IMPLICIT_DEF)), FR32X:$src)>;
|
||||
def : Pat<(f32 (fsqrt (load addr:$src))),
|
||||
(VSQRTSSZm (f32 (IMPLICIT_DEF)), addr:$src)>,
|
||||
Requires<[OptForSize]>;
|
||||
def : Pat<(f64 (fsqrt FR64X:$src)),
|
||||
(VSQRTSDZr (f64 (IMPLICIT_DEF)), FR64X:$src)>;
|
||||
def : Pat<(f64 (fsqrt (load addr:$src))),
|
||||
(VSQRTSDZm (f64 (IMPLICIT_DEF)), addr:$src)>,
|
||||
Requires<[OptForSize]>;
|
||||
|
||||
def : Pat<(f32 (X86frsqrt FR32X:$src)),
|
||||
(VRSQRT14SSZr (f32 (IMPLICIT_DEF)), FR32X:$src)>;
|
||||
def : Pat<(f32 (X86frsqrt (load addr:$src))),
|
||||
(VRSQRT14SSZm (f32 (IMPLICIT_DEF)), addr:$src)>,
|
||||
Requires<[OptForSize]>;
|
||||
def : Pat<(f32 (X86frsqrt FR32X:$src)),
|
||||
(VRSQRT14SSZr (f32 (IMPLICIT_DEF)), FR32X:$src)>;
|
||||
def : Pat<(f32 (X86frsqrt (load addr:$src))),
|
||||
(VRSQRT14SSZm (f32 (IMPLICIT_DEF)), addr:$src)>,
|
||||
Requires<[OptForSize]>;
|
||||
|
||||
def : Pat<(f32 (X86frcp FR32X:$src)),
|
||||
(VRCP14SSZr (f32 (IMPLICIT_DEF)), FR32X:$src)>;
|
||||
def : Pat<(f32 (X86frcp (load addr:$src))),
|
||||
(VRCP14SSZm (f32 (IMPLICIT_DEF)), addr:$src)>,
|
||||
Requires<[OptForSize]>;
|
||||
|
||||
def : Pat<(int_x86_sse_sqrt_ss VR128X:$src),
|
||||
(COPY_TO_REGCLASS (VSQRTSSZr (f32 (IMPLICIT_DEF)),
|
||||
(COPY_TO_REGCLASS VR128X:$src, FR32)),
|
||||
VR128X)>;
|
||||
def : Pat<(int_x86_sse_sqrt_ss sse_load_f32:$src),
|
||||
(VSQRTSSZm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>;
|
||||
|
||||
def : Pat<(int_x86_sse2_sqrt_sd VR128X:$src),
|
||||
(COPY_TO_REGCLASS (VSQRTSDZr (f64 (IMPLICIT_DEF)),
|
||||
(COPY_TO_REGCLASS VR128X:$src, FR64)),
|
||||
VR128X)>;
|
||||
def : Pat<(int_x86_sse2_sqrt_sd sse_load_f64:$src),
|
||||
(VSQRTSDZm_Int (v2f64 (IMPLICIT_DEF)), sse_load_f64:$src)>;
|
||||
}
|
||||
|
||||
def : Pat<(f32 (X86frcp FR32X:$src)),
|
||||
(VRCP14SSZr (f32 (IMPLICIT_DEF)), FR32X:$src)>;
|
||||
def : Pat<(f32 (X86frcp (load addr:$src))),
|
||||
(VRCP14SSZm (f32 (IMPLICIT_DEF)), addr:$src)>,
|
||||
Requires<[OptForSize]>;
|
||||
|
||||
multiclass avx512_fp_unop_rm<bits<8> opcps, bits<8> opcpd, string OpcodeStr,
|
||||
X86MemOperand x86memop, RegisterClass RC,
|
||||
|
|
|
@ -649,13 +649,13 @@ def HasSSE4A : Predicate<"Subtarget->hasSSE4A()">;
|
|||
def HasAVX : Predicate<"Subtarget->hasAVX()">;
|
||||
def HasAVX2 : Predicate<"Subtarget->hasAVX2()">;
|
||||
def HasAVX1Only : Predicate<"Subtarget->hasAVX() && !Subtarget->hasAVX2()">;
|
||||
def HasAVX512 : Predicate<"Subtarget->hasAVX512()">;
|
||||
def HasAVX512 : Predicate<"Subtarget->hasAVX512()">;
|
||||
def UseAVX : Predicate<"Subtarget->hasAVX() && !Subtarget->hasAVX512()">;
|
||||
def UseAVX2 : Predicate<"Subtarget->hasAVX2() && !Subtarget->hasAVX512()">;
|
||||
def NoAVX512 : Predicate<"!Subtarget->hasAVX512()">;
|
||||
def HasCDI : Predicate<"Subtarget->hasCDI()">;
|
||||
def HasPFI : Predicate<"Subtarget->hasPFI()">;
|
||||
def HasEMI : Predicate<"Subtarget->hasERI()">;
|
||||
def HasERI : Predicate<"Subtarget->hasERI()">;
|
||||
|
||||
def HasPOPCNT : Predicate<"Subtarget->hasPOPCNT()">;
|
||||
def HasAES : Predicate<"Subtarget->hasAES()">;
|
||||
|
|
|
@ -3357,7 +3357,8 @@ let Predicates = [UseAVX] in {
|
|||
def : Pat<(f32 (X86frcp (load addr:$src))),
|
||||
(VRCPSSm (f32 (IMPLICIT_DEF)), addr:$src)>,
|
||||
Requires<[HasAVX, OptForSize]>;
|
||||
|
||||
}
|
||||
let Predicates = [UseAVX] in {
|
||||
def : Pat<(int_x86_sse_sqrt_ss VR128:$src),
|
||||
(COPY_TO_REGCLASS (VSQRTSSr (f32 (IMPLICIT_DEF)),
|
||||
(COPY_TO_REGCLASS VR128:$src, FR32)),
|
||||
|
@ -3371,7 +3372,9 @@ let Predicates = [UseAVX] in {
|
|||
VR128)>;
|
||||
def : Pat<(int_x86_sse2_sqrt_sd sse_load_f64:$src),
|
||||
(VSQRTSDm_Int (v2f64 (IMPLICIT_DEF)), sse_load_f64:$src)>;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX] in {
|
||||
def : Pat<(int_x86_sse_rsqrt_ss VR128:$src),
|
||||
(COPY_TO_REGCLASS (VRSQRTSSr (f32 (IMPLICIT_DEF)),
|
||||
(COPY_TO_REGCLASS VR128:$src, FR32)),
|
||||
|
|
|
@ -1,39 +1,52 @@
|
|||
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
|
||||
|
||||
declare i32 @llvm.x86.avx512.kortestz(i16, i16) nounwind readnone
|
||||
; CHECK: test_x86_avx3_kortestz
|
||||
; CHECK: test_kortestz
|
||||
; CHECK: kortestw
|
||||
; CHECK: sete
|
||||
define i32 @test_x86_avx3_kortestz(i16 %a0, i16 %a1) {
|
||||
define i32 @test_kortestz(i16 %a0, i16 %a1) {
|
||||
%res = call i32 @llvm.x86.avx512.kortestz(i16 %a0, i16 %a1)
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
declare i32 @llvm.x86.avx512.kortestc(i16, i16) nounwind readnone
|
||||
; CHECK: test_x86_avx3_kortestc
|
||||
; CHECK: test_kortestc
|
||||
; CHECK: kortestw
|
||||
; CHECK: sbbl
|
||||
define i32 @test_x86_avx3_kortestc(i16 %a0, i16 %a1) {
|
||||
define i32 @test_kortestc(i16 %a0, i16 %a1) {
|
||||
%res = call i32 @llvm.x86.avx512.kortestc(i16 %a0, i16 %a1)
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
define <16 x float> @test_x86_avx3_rcp_ps_512(<16 x float> %a0) {
|
||||
define <16 x float> @test_rcp_ps_512(<16 x float> %a0) {
|
||||
; CHECK: vrcp14ps
|
||||
%res = call <16 x float> @llvm.x86.avx512.rcp14.ps.512(<16 x float> %a0) ; <<16 x float>> [#uses=1]
|
||||
ret <16 x float> %res
|
||||
}
|
||||
declare <16 x float> @llvm.x86.avx512.rcp14.ps.512(<16 x float>) nounwind readnone
|
||||
|
||||
define <8 x double> @test_x86_avx3_rcp_pd_512(<8 x double> %a0) {
|
||||
define <8 x double> @test_rcp_pd_512(<8 x double> %a0) {
|
||||
; CHECK: vrcp14pd
|
||||
%res = call <8 x double> @llvm.x86.avx512.rcp14.pd.512(<8 x double> %a0) ; <<8 x double>> [#uses=1]
|
||||
ret <8 x double> %res
|
||||
}
|
||||
declare <8 x double> @llvm.x86.avx512.rcp14.pd.512(<8 x double>) nounwind readnone
|
||||
|
||||
define <16 x float> @test_rcp28_ps_512(<16 x float> %a0) {
|
||||
; CHECK: vrcp28ps
|
||||
%res = call <16 x float> @llvm.x86.avx512.rcp28.ps.512(<16 x float> %a0) ; <<16 x float>> [#uses=1]
|
||||
ret <16 x float> %res
|
||||
}
|
||||
declare <16 x float> @llvm.x86.avx512.rcp28.ps.512(<16 x float>) nounwind readnone
|
||||
|
||||
define <8 x double> @test_x86_avx3_rndscale_pd_512(<8 x double> %a0) {
|
||||
define <8 x double> @test_rcp28_pd_512(<8 x double> %a0) {
|
||||
; CHECK: vrcp28pd
|
||||
%res = call <8 x double> @llvm.x86.avx512.rcp28.pd.512(<8 x double> %a0) ; <<8 x double>> [#uses=1]
|
||||
ret <8 x double> %res
|
||||
}
|
||||
declare <8 x double> @llvm.x86.avx512.rcp28.pd.512(<8 x double>) nounwind readnone
|
||||
|
||||
define <8 x double> @test_rndscale_pd_512(<8 x double> %a0) {
|
||||
; CHECK: vrndscale
|
||||
%res = call <8 x double> @llvm.x86.avx512.rndscale.pd.512(<8 x double> %a0, i32 7) ; <<8 x double>> [#uses=1]
|
||||
ret <8 x double> %res
|
||||
|
@ -41,7 +54,7 @@ define <8 x double> @test_x86_avx3_rndscale_pd_512(<8 x double> %a0) {
|
|||
declare <8 x double> @llvm.x86.avx512.rndscale.pd.512(<8 x double>, i32) nounwind readnone
|
||||
|
||||
|
||||
define <16 x float> @test_x86_avx3_rndscale_ps_512(<16 x float> %a0) {
|
||||
define <16 x float> @test_rndscale_ps_512(<16 x float> %a0) {
|
||||
; CHECK: vrndscale
|
||||
%res = call <16 x float> @llvm.x86.avx512.rndscale.ps.512(<16 x float> %a0, i32 7) ; <<16 x float>> [#uses=1]
|
||||
ret <16 x float> %res
|
||||
|
@ -49,37 +62,70 @@ define <16 x float> @test_x86_avx3_rndscale_ps_512(<16 x float> %a0) {
|
|||
declare <16 x float> @llvm.x86.avx512.rndscale.ps.512(<16 x float>, i32) nounwind readnone
|
||||
|
||||
|
||||
define <16 x float> @test_x86_avx3_rsqrt_ps_512(<16 x float> %a0) {
|
||||
define <16 x float> @test_rsqrt_ps_512(<16 x float> %a0) {
|
||||
; CHECK: vrsqrt14ps
|
||||
%res = call <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float> %a0) ; <<16 x float>> [#uses=1]
|
||||
ret <16 x float> %res
|
||||
}
|
||||
declare <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float>) nounwind readnone
|
||||
|
||||
define <16 x float> @test_rsqrt28_ps_512(<16 x float> %a0) {
|
||||
; CHECK: vrsqrt28ps
|
||||
%res = call <16 x float> @llvm.x86.avx512.rsqrt28.ps.512(<16 x float> %a0) ; <<16 x float>> [#uses=1]
|
||||
ret <16 x float> %res
|
||||
}
|
||||
declare <16 x float> @llvm.x86.avx512.rsqrt28.ps.512(<16 x float>) nounwind readnone
|
||||
|
||||
define <8 x double> @test_x86_avx3_sqrt_pd_512(<8 x double> %a0) {
|
||||
define <4 x float> @test_rsqrt14_ss(<4 x float> %a0) {
|
||||
; CHECK: vrsqrt14ss
|
||||
%res = call <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1]
|
||||
ret <4 x float> %res
|
||||
}
|
||||
declare <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float>) nounwind readnone
|
||||
|
||||
define <4 x float> @test_rsqrt28_ss(<4 x float> %a0) {
|
||||
; CHECK: vrsqrt28ss
|
||||
%res = call <4 x float> @llvm.x86.avx512.rsqrt28.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1]
|
||||
ret <4 x float> %res
|
||||
}
|
||||
declare <4 x float> @llvm.x86.avx512.rsqrt28.ss(<4 x float>) nounwind readnone
|
||||
|
||||
define <4 x float> @test_rcp14_ss(<4 x float> %a0) {
|
||||
; CHECK: vrcp14ss
|
||||
%res = call <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1]
|
||||
ret <4 x float> %res
|
||||
}
|
||||
declare <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float>) nounwind readnone
|
||||
|
||||
define <4 x float> @test_rcp28_ss(<4 x float> %a0) {
|
||||
; CHECK: vrcp28ss
|
||||
%res = call <4 x float> @llvm.x86.avx512.rcp28.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1]
|
||||
ret <4 x float> %res
|
||||
}
|
||||
declare <4 x float> @llvm.x86.avx512.rcp28.ss(<4 x float>) nounwind readnone
|
||||
|
||||
define <8 x double> @test_sqrt_pd_512(<8 x double> %a0) {
|
||||
; CHECK: vsqrtpd
|
||||
%res = call <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double> %a0) ; <<8 x double>> [#uses=1]
|
||||
ret <8 x double> %res
|
||||
}
|
||||
declare <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double>) nounwind readnone
|
||||
|
||||
|
||||
define <16 x float> @test_x86_avx3_sqrt_ps_512(<16 x float> %a0) {
|
||||
define <16 x float> @test_sqrt_ps_512(<16 x float> %a0) {
|
||||
; CHECK: vsqrtps
|
||||
%res = call <16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float> %a0) ; <<16 x float>> [#uses=1]
|
||||
ret <16 x float> %res
|
||||
}
|
||||
declare <16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float>) nounwind readnone
|
||||
|
||||
define <4 x float> @test_x86_avx3_sqrt_ss(<4 x float> %a0, <4 x float> %a1) {
|
||||
define <4 x float> @test_sqrt_ss(<4 x float> %a0, <4 x float> %a1) {
|
||||
; CHECK: vsqrtssz
|
||||
%res = call <4 x float> @llvm.x86.avx512.sqrt.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
|
||||
ret <4 x float> %res
|
||||
}
|
||||
declare <4 x float> @llvm.x86.avx512.sqrt.ss(<4 x float>, <4 x float>) nounwind readnone
|
||||
|
||||
define <2 x double> @test_x86_avx3_sqrt_sd(<2 x double> %a0, <2 x double> %a1) {
|
||||
define <2 x double> @test_sqrt_sd(<2 x double> %a0, <2 x double> %a1) {
|
||||
; CHECK: vsqrtsdz
|
||||
%res = call <2 x double> @llvm.x86.avx512.sqrt.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
|
||||
ret <2 x double> %res
|
||||
|
|
Loading…
Reference in New Issue