forked from OSchip/llvm-project
[Clang][AVX512][intrinsics] Fix rcp and sqrt intrinsics.
Differential Revision: http://reviews.llvm.org/D20438 llvm-svn: 270322
This commit is contained in:
parent
11b55b29d1
commit
a63a129749
|
@ -21753,7 +21753,9 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
|
|||
case X86ISD::FMAXC: return "X86ISD::FMAXC";
|
||||
case X86ISD::FMINC: return "X86ISD::FMINC";
|
||||
case X86ISD::FRSQRT: return "X86ISD::FRSQRT";
|
||||
case X86ISD::FRSQRTS: return "X86ISD::FRSQRTS";
|
||||
case X86ISD::FRCP: return "X86ISD::FRCP";
|
||||
case X86ISD::FRCPS: return "X86ISD::FRCPS";
|
||||
case X86ISD::EXTRQI: return "X86ISD::EXTRQI";
|
||||
case X86ISD::INSERTQI: return "X86ISD::INSERTQI";
|
||||
case X86ISD::TLSADDR: return "X86ISD::TLSADDR";
|
||||
|
|
|
@ -250,7 +250,8 @@ namespace llvm {
|
|||
/// Note that these typically require refinement
|
||||
/// in order to obtain suitable precision.
|
||||
FRSQRT, FRCP,
|
||||
|
||||
FRSQRTS, FRCPS,
|
||||
|
||||
// Thread Local Storage.
|
||||
TLSADDR,
|
||||
|
||||
|
|
|
@ -60,8 +60,8 @@ def X86fandn : SDNode<"X86ISD::FANDN", SDTFPBinOp,
|
|||
[SDNPCommutative, SDNPAssociative]>;
|
||||
def X86frsqrt : SDNode<"X86ISD::FRSQRT", SDTFPUnaryOp>;
|
||||
def X86frcp : SDNode<"X86ISD::FRCP", SDTFPUnaryOp>;
|
||||
def X86frsqrt14s: SDNode<"X86ISD::FRSQRT", SDTFPBinOp>;
|
||||
def X86frcp14s : SDNode<"X86ISD::FRCP", SDTFPBinOp>;
|
||||
def X86frsqrt14s: SDNode<"X86ISD::FRSQRTS", SDTFPBinOp>;
|
||||
def X86frcp14s : SDNode<"X86ISD::FRCPS", SDTFPBinOp>;
|
||||
def X86fhadd : SDNode<"X86ISD::FHADD", SDTFPBinOp>;
|
||||
def X86fhsub : SDNode<"X86ISD::FHSUB", SDTFPBinOp>;
|
||||
def X86hadd : SDNode<"X86ISD::HADD", SDTIntBinOp>;
|
||||
|
|
|
@ -2125,8 +2125,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
|||
X86_INTRINSIC_DATA(avx512_rcp14_ps_128, INTR_TYPE_1OP_MASK, X86ISD::FRCP, 0),
|
||||
X86_INTRINSIC_DATA(avx512_rcp14_ps_256, INTR_TYPE_1OP_MASK, X86ISD::FRCP, 0),
|
||||
X86_INTRINSIC_DATA(avx512_rcp14_ps_512, INTR_TYPE_1OP_MASK, X86ISD::FRCP, 0),
|
||||
X86_INTRINSIC_DATA(avx512_rcp14_sd, INTR_TYPE_SCALAR_MASK, X86ISD::FRCP, 0),
|
||||
X86_INTRINSIC_DATA(avx512_rcp14_ss, INTR_TYPE_SCALAR_MASK, X86ISD::FRCP, 0),
|
||||
X86_INTRINSIC_DATA(avx512_rcp14_sd, INTR_TYPE_SCALAR_MASK, X86ISD::FRCPS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_rcp14_ss, INTR_TYPE_SCALAR_MASK, X86ISD::FRCPS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_rcp28_pd, INTR_TYPE_1OP_MASK_RM, X86ISD::RCP28, 0),
|
||||
X86_INTRINSIC_DATA(avx512_rcp28_ps, INTR_TYPE_1OP_MASK_RM, X86ISD::RCP28, 0),
|
||||
X86_INTRINSIC_DATA(avx512_rcp28_sd, INTR_TYPE_SCALAR_MASK_RM, X86ISD::RCP28, 0),
|
||||
|
@ -2137,8 +2137,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
|||
X86_INTRINSIC_DATA(avx512_rsqrt14_ps_128, INTR_TYPE_1OP_MASK, X86ISD::FRSQRT, 0),
|
||||
X86_INTRINSIC_DATA(avx512_rsqrt14_ps_256, INTR_TYPE_1OP_MASK, X86ISD::FRSQRT, 0),
|
||||
X86_INTRINSIC_DATA(avx512_rsqrt14_ps_512, INTR_TYPE_1OP_MASK, X86ISD::FRSQRT, 0),
|
||||
X86_INTRINSIC_DATA(avx512_rsqrt14_sd, INTR_TYPE_SCALAR_MASK, X86ISD::FRSQRT, 0),
|
||||
X86_INTRINSIC_DATA(avx512_rsqrt14_ss, INTR_TYPE_SCALAR_MASK, X86ISD::FRSQRT, 0),
|
||||
X86_INTRINSIC_DATA(avx512_rsqrt14_sd, INTR_TYPE_SCALAR_MASK, X86ISD::FRSQRTS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_rsqrt14_ss, INTR_TYPE_SCALAR_MASK, X86ISD::FRSQRTS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_rsqrt28_pd, INTR_TYPE_1OP_MASK_RM,X86ISD::RSQRT28, 0),
|
||||
X86_INTRINSIC_DATA(avx512_rsqrt28_ps, INTR_TYPE_1OP_MASK_RM,X86ISD::RSQRT28, 0),
|
||||
X86_INTRINSIC_DATA(avx512_rsqrt28_sd, INTR_TYPE_SCALAR_MASK_RM,X86ISD::RSQRT28, 0),
|
||||
|
|
|
@ -126,26 +126,6 @@ define <16 x float> @test_rsqrt_ps_512(<16 x float> %a0) {
|
|||
}
|
||||
declare <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float>, <16 x float>, i16) nounwind readnone
|
||||
|
||||
define <4 x float> @test_rsqrt14_ss(<4 x float> %a0) {
|
||||
; CHECK-LABEL: test_rsqrt14_ss:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vrsqrt14ss %xmm0, %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1) ; <<4 x float>> [#uses=1]
|
||||
ret <4 x float> %res
|
||||
}
|
||||
declare <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
|
||||
|
||||
define <4 x float> @test_rcp14_ss(<4 x float> %a0) {
|
||||
; CHECK-LABEL: test_rcp14_ss:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vrcp14ss %xmm0, %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1) ; <<4 x float>> [#uses=1]
|
||||
ret <4 x float> %res
|
||||
}
|
||||
declare <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
|
||||
|
||||
define <8 x double> @test_sqrt_pd_512(<8 x double> %a0) {
|
||||
; CHECK-LABEL: test_sqrt_pd_512:
|
||||
; CHECK: ## BB#0:
|
||||
|
|
|
@ -1,6 +1,48 @@
|
|||
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
|
||||
|
||||
|
||||
define <4 x float> @test_rsqrt14_ss(<4 x float> %a0) {
|
||||
; CHECK-LABEL: test_rsqrt14_ss:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vrsqrt14ss %xmm0, %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1) ;
|
||||
ret <4 x float> %res
|
||||
}
|
||||
declare <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
|
||||
|
||||
define <4 x float> @test_rcp14_ss(<4 x float> %a0) {
|
||||
; CHECK-LABEL: test_rcp14_ss:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vrcp14ss %xmm0, %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1) ;
|
||||
ret <4 x float> %res
|
||||
}
|
||||
declare <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
|
||||
|
||||
define <2 x double> @test_rsqrt14_sd(<2 x double> %a0) {
|
||||
; CHECK-LABEL: test_rsqrt14_sd:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vrsqrt14sd %xmm0, %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <2 x double> @llvm.x86.avx512.rsqrt14.sd(<2 x double> %a0, <2 x double> %a0, <2 x double> zeroinitializer, i8 -1) ;
|
||||
ret <2 x double> %res
|
||||
}
|
||||
declare <2 x double> @llvm.x86.avx512.rsqrt14.sd(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone
|
||||
|
||||
define <2 x double> @test_rcp14_sd(<2 x double> %a0) {
|
||||
; CHECK-LABEL: test_rcp14_sd:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vrcp14sd %xmm0, %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <2 x double> @llvm.x86.avx512.rcp14.sd(<2 x double> %a0, <2 x double> %a0, <2 x double> zeroinitializer, i8 -1) ;
|
||||
ret <2 x double> %res
|
||||
|
||||
}
|
||||
declare <2 x double> @llvm.x86.avx512.rcp14.sd(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone
|
||||
|
||||
declare <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float>, <4 x float>,<4 x float>, i8, i32)
|
||||
define <4 x float>@test_int_x86_avx512_mask_scalef_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_scalef_ss:
|
||||
|
|
Loading…
Reference in New Issue