[Clang][AVX512][intrinsics] Fix vscalef intrinsics.

Differential Revision: http://reviews.llvm.org/D20324

llvm-svn: 270321
This commit is contained in:
Michael Zuckerman 2016-05-21 11:09:53 +00:00
parent c13c59afa7
commit 11b55b29d1
8 changed files with 37 additions and 43 deletions

View File

@ -21923,6 +21923,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::FSQRT_RND: return "X86ISD::FSQRT_RND";
case X86ISD::FGETEXP_RND: return "X86ISD::FGETEXP_RND";
case X86ISD::SCALEF: return "X86ISD::SCALEF";
case X86ISD::SCALEFS: return "X86ISD::SCALEFS";
case X86ISD::ADDS: return "X86ISD::ADDS";
case X86ISD::SUBS: return "X86ISD::SUBS";
case X86ISD::AVG: return "X86ISD::AVG";

View File

@ -213,6 +213,7 @@ namespace llvm {
VGETMANT,
// FP Scale.
SCALEF,
SCALEFS,
// Integer add/sub with unsigned saturation.
ADDUS,

View File

@ -3827,18 +3827,18 @@ multiclass avx512_fp_scalef_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
}//let mayLoad = 1
}
multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr, SDNode OpNode> {
multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr, SDNode OpNode, SDNode OpNodeScal> {
defm PSZ : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, v16f32_info>,
avx512_fp_round_packed<opc, OpcodeStr, OpNode, v16f32_info>,
EVEX_V512, EVEX_CD8<32, CD8VF>;
defm PDZ : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, v8f64_info>,
avx512_fp_round_packed<opc, OpcodeStr, OpNode, v8f64_info>,
EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
defm SSZ128 : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, OpNode, f32x_info>,
avx512_fp_scalar_round<opcScaler, OpcodeStr##"ss", f32x_info, OpNode, SSE_ALU_ITINS_S.s>,
defm SSZ128 : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, OpNodeScal, f32x_info>,
avx512_fp_scalar_round<opcScaler, OpcodeStr##"ss", f32x_info, OpNodeScal, SSE_ALU_ITINS_S.s>,
EVEX_4V,EVEX_CD8<32, CD8VT1>;
defm SDZ128 : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, OpNode, f64x_info>,
avx512_fp_scalar_round<opcScaler, OpcodeStr##"sd", f64x_info, OpNode, SSE_ALU_ITINS_S.d>,
defm SDZ128 : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, OpNodeScal, f64x_info>,
avx512_fp_scalar_round<opcScaler, OpcodeStr##"sd", f64x_info, OpNodeScal, SSE_ALU_ITINS_S.d>,
EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
// Define only if AVX512VL feature is present.
@ -3853,7 +3853,7 @@ multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr
EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
}
}
defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef", X86scalef>, T8PD;
defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef", X86scalef, X86scalefs>, T8PD;
//===----------------------------------------------------------------------===//
// AVX-512 VPTESTM instructions

View File

@ -460,6 +460,7 @@ def X86fmulRnd : SDNode<"X86ISD::FMUL_RND", SDTFPBinOpRound>;
def X86fdivRnd : SDNode<"X86ISD::FDIV_RND", SDTFPBinOpRound>;
def X86fmaxRnd : SDNode<"X86ISD::FMAX_RND", SDTFPBinOpRound>;
def X86scalef : SDNode<"X86ISD::SCALEF", SDTFPBinOpRound>;
def X86scalefs : SDNode<"X86ISD::SCALEFS", SDTFPBinOpRound>;
def X86fminRnd : SDNode<"X86ISD::FMIN_RND", SDTFPBinOpRound>;
def X86fsqrtRnd : SDNode<"X86ISD::FSQRT_RND", SDTFPUnaryOpRound>;
def X86fsqrtRnds : SDNode<"X86ISD::FSQRT_RND", SDTFPBinOpRound>;

View File

@ -1659,9 +1659,9 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_mask_scalef_ps_512, INTR_TYPE_2OP_MASK_RM,
X86ISD::SCALEF, 0),
X86_INTRINSIC_DATA(avx512_mask_scalef_sd, INTR_TYPE_SCALAR_MASK_RM,
X86ISD::SCALEF, 0),
X86ISD::SCALEFS, 0),
X86_INTRINSIC_DATA(avx512_mask_scalef_ss, INTR_TYPE_SCALAR_MASK_RM,
X86ISD::SCALEF, 0),
X86ISD::SCALEFS, 0),
X86_INTRINSIC_DATA(avx512_mask_shuf_f32x4, INTR_TYPE_3OP_IMM8_MASK,
X86ISD::SHUF128, 0),
X86_INTRINSIC_DATA(avx512_mask_shuf_f32x4_256, INTR_TYPE_3OP_IMM8_MASK,

View File

@ -5616,39 +5616,6 @@ define <16 x i32>@test_int_x86_avx512_mask_cvtt_ps2udq_512(<16 x float> %x0, <16
ret <16 x i32> %res2
}
declare <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float>, <4 x float>,<4 x float>, i8, i32)
define <4 x float>@test_int_x86_avx512_mask_scalef_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4) {
; CHECK-LABEL: test_int_x86_avx512_mask_scalef_ss:
; CHECK: ## BB#0:
; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vscalefss %xmm1, %xmm0, %xmm2 {%k1}
; CHECK-NEXT: vscalefss {rn-sae}, %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0
; CHECK-NEXT: retq
%res = call <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4, i32 4)
%res1 = call <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 -1, i32 8)
%res2 = fadd <4 x float> %res, %res1
ret <4 x float> %res2
}
declare <2 x double> @llvm.x86.avx512.mask.scalef.sd(<2 x double>, <2 x double>,<2 x double>, i8, i32)
define <2 x double>@test_int_x86_avx512_mask_scalef_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4) {
; CHECK-LABEL: test_int_x86_avx512_mask_scalef_sd:
; CHECK: ## BB#0:
; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vscalefsd %xmm1, %xmm0, %xmm2 {%k1}
; CHECK-NEXT: vscalefsd {rn-sae}, %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vaddpd %xmm0, %xmm2, %xmm0
; CHECK-NEXT: retq
%res = call <2 x double> @llvm.x86.avx512.mask.scalef.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4, i32 4)
%res1 = call <2 x double> @llvm.x86.avx512.mask.scalef.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 -1, i32 8)
%res2 = fadd <2 x double> %res, %res1
ret <2 x double> %res2
}
declare <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone
define <4 x float> @test_getexp_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {

View File

@ -0,0 +1,24 @@
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
declare <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float>, <4 x float>,<4 x float>, i8, i32)
define <4 x float>@test_int_x86_avx512_mask_scalef_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4) {
; CHECK-LABEL: test_int_x86_avx512_mask_scalef_ss:
; CHECK: vscalefss %xmm1, %xmm0, %xmm2 {%k1}
; CHECK: vscalefss {rn-sae}, %xmm1, %xmm0, %xmm0
%res = call <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4, i32 4)
%res1 = call <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 -1, i32 8)
%res2 = fadd <4 x float> %res, %res1
ret <4 x float> %res2
}
declare <2 x double> @llvm.x86.avx512.mask.scalef.sd(<2 x double>, <2 x double>,<2 x double>, i8, i32)
define <2 x double>@test_int_x86_avx512_mask_scalef_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4) {
; CHECK-LABEL: test_int_x86_avx512_mask_scalef_sd:
; CHECK: vscalefsd %xmm1, %xmm0, %xmm2 {%k1}
; CHECK: vscalefsd {rn-sae}, %xmm1, %xmm0, %xmm0
%res = call <2 x double> @llvm.x86.avx512.mask.scalef.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4, i32 4)
%res1 = call <2 x double> @llvm.x86.avx512.mask.scalef.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 -1, i32 8)
%res2 = fadd <2 x double> %res, %res1
ret <2 x double> %res2
}

View File

@ -4243,7 +4243,7 @@ define <2 x double>@test_int_x86_avx512_mask_scalef_pd_128(<2 x double> %x0, <2
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vscalefpd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x2c,0xd1]
; CHECK-NEXT: vscalefsd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x2d,0xc1]
; CHECK-NEXT: vscalefpd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x2c,0xc1]
; CHECK-NEXT: vaddpd %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xed,0x08,0x58,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call <2 x double> @llvm.x86.avx512.mask.scalef.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
@ -4275,7 +4275,7 @@ define <4 x float>@test_int_x86_avx512_mask_scalef_ps_128(<4 x float> %x0, <4 x
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vscalefps %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x2c,0xd1]
; CHECK-NEXT: vscalefss %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x2d,0xc1]
; CHECK-NEXT: vscalefps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x2c,0xc1]
; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6c,0x08,0x58,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call <4 x float> @llvm.x86.avx512.mask.scalef.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)