forked from OSchip/llvm-project
[AVX-512] Use sse_load_f32/f64 in place of scalar_to_vector and scalar load in some patterns.
llvm-svn: 295693
This commit is contained in:
parent
63b7d71844
commit
d9fe664868
|
@ -92,6 +92,12 @@ class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc,
|
|||
|
||||
PatFrag ScalarLdFrag = !cast<PatFrag>("load" # EltVT);
|
||||
|
||||
ComplexPattern ScalarIntMemCPat = !if (!eq (EltTypeName, "f32"),
|
||||
!cast<ComplexPattern>("sse_load_f32"),
|
||||
!if (!eq (EltTypeName, "f64"),
|
||||
!cast<ComplexPattern>("sse_load_f64"),
|
||||
?));
|
||||
|
||||
// The corresponding float type, e.g. v16f32 for v16i32
|
||||
// Note: For EltSize < 32, FloatVT is illegal and TableGen
|
||||
// fails to compile, so we choose FloatVT = VT
|
||||
|
@ -1518,11 +1524,10 @@ multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeRnd>
|
|||
imm:$cc)>, EVEX_4V;
|
||||
defm rm_Int : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
|
||||
(outs _.KRC:$dst),
|
||||
(ins _.RC:$src1, _.ScalarMemOp:$src2, AVXCC:$cc),
|
||||
(ins _.RC:$src1, _.IntScalarMemOp:$src2, AVXCC:$cc),
|
||||
"vcmp${cc}"#_.Suffix,
|
||||
"$src2, $src1", "$src1, $src2",
|
||||
(OpNode (_.VT _.RC:$src1),
|
||||
(_.VT (scalar_to_vector (_.ScalarLdFrag addr:$src2))),
|
||||
(OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2,
|
||||
imm:$cc)>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>;
|
||||
|
||||
defm rrb_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
|
||||
|
@ -4140,16 +4145,16 @@ multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
|
|||
defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
|
||||
(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
|
||||
"$src2, $src1", "$src1, $src2",
|
||||
(VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
|
||||
(i32 FROUND_CURRENT)),
|
||||
(_.VT (VecNode _.RC:$src1, _.RC:$src2,
|
||||
(i32 FROUND_CURRENT))),
|
||||
itins.rr>;
|
||||
|
||||
defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
|
||||
(ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
|
||||
(ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
|
||||
"$src2, $src1", "$src1, $src2",
|
||||
(VecNode (_.VT _.RC:$src1),
|
||||
(_.VT (scalar_to_vector (_.ScalarLdFrag addr:$src2))),
|
||||
(i32 FROUND_CURRENT)),
|
||||
(_.VT (VecNode _.RC:$src1,
|
||||
_.ScalarIntMemCPat:$src2,
|
||||
(i32 FROUND_CURRENT))),
|
||||
itins.rm>;
|
||||
let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
|
||||
def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
|
||||
|
@ -5512,7 +5517,7 @@ multiclass avx512_fma3s_common<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
|
|||
"$src3, $src2", "$src2, $src3", RHS_VEC_r, 1, 1>, AVX512FMA3Base;
|
||||
|
||||
defm m_Int: AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
|
||||
(ins _.RC:$src2, _.ScalarMemOp:$src3), OpcodeStr,
|
||||
(ins _.RC:$src2, _.IntScalarMemOp:$src3), OpcodeStr,
|
||||
"$src3, $src2", "$src2, $src3", RHS_VEC_m, 1, 1>, AVX512FMA3Base;
|
||||
|
||||
defm rb_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
|
||||
|
@ -5544,7 +5549,7 @@ multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132,
|
|||
// semantics.
|
||||
(_.VT (OpNodeRnds1 _.RC:$src1, _.RC:$src2, _.RC:$src3, (i32 FROUND_CURRENT))),
|
||||
(_.VT (OpNodeRnds1 _.RC:$src1, _.RC:$src2,
|
||||
(_.VT (scalar_to_vector(_.ScalarLdFrag addr:$src3))), (i32 FROUND_CURRENT))),
|
||||
_.ScalarIntMemCPat:$src3, (i32 FROUND_CURRENT))),
|
||||
(_.VT (OpNodeRnds1 _.RC:$src1, _.RC:$src2, _.RC:$src3,
|
||||
(i32 imm:$rc))),
|
||||
(set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
|
||||
|
@ -5554,8 +5559,7 @@ multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132,
|
|||
|
||||
defm NAME#231#SUFF#Z: avx512_fma3s_common<opc231, OpcodeStr#"231"#_.Suffix , _ ,
|
||||
(_.VT (OpNodeRnds3 _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 FROUND_CURRENT))),
|
||||
(_.VT (OpNodeRnds3 _.RC:$src2,
|
||||
(_.VT (scalar_to_vector(_.ScalarLdFrag addr:$src3))),
|
||||
(_.VT (OpNodeRnds3 _.RC:$src2, _.ScalarIntMemCPat:$src3,
|
||||
_.RC:$src1, (i32 FROUND_CURRENT))),
|
||||
(_.VT ( OpNodeRnds3 _.RC:$src2, _.RC:$src3, _.RC:$src1,
|
||||
(i32 imm:$rc))),
|
||||
|
@ -5566,8 +5570,7 @@ multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132,
|
|||
|
||||
defm NAME#132#SUFF#Z: avx512_fma3s_common<opc132, OpcodeStr#"132"#_.Suffix , _ ,
|
||||
(_.VT (OpNodeRnds1 _.RC:$src1, _.RC:$src3, _.RC:$src2, (i32 FROUND_CURRENT))),
|
||||
(_.VT (OpNodeRnds1 _.RC:$src1,
|
||||
(_.VT (scalar_to_vector(_.ScalarLdFrag addr:$src3))),
|
||||
(_.VT (OpNodeRnds1 _.RC:$src1, _.ScalarIntMemCPat:$src3,
|
||||
_.RC:$src2, (i32 FROUND_CURRENT))),
|
||||
(_.VT (OpNodeRnds1 _.RC:$src1, _.RC:$src3, _.RC:$src2,
|
||||
(i32 imm:$rc))),
|
||||
|
|
|
@ -4,10 +4,9 @@
|
|||
define i8 @test_int_x86_avx512_mask_cmp_ss(<4 x float> %a, float* %b, i8 %mask) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_cmp_ss:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; CHECK-NEXT: andl $1, %esi
|
||||
; CHECK-NEXT: kmovw %esi, %k1
|
||||
; CHECK-NEXT: vcmpunordss %xmm1, %xmm0, %k0 {%k1}
|
||||
; CHECK-NEXT: vcmpunordss (%rdi), %xmm0, %k0 {%k1}
|
||||
; CHECK-NEXT: kmovw %k0, %eax
|
||||
; CHECK-NEXT: andl $1, %eax
|
||||
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
|
||||
|
@ -25,10 +24,9 @@ declare i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float>, <4 x float>, i32, i8, i32)
|
|||
define <4 x float> @test_mask_max_ss(<4 x float> %a, float* %b, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_max_ss:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; CHECK-NEXT: andl $1, %esi
|
||||
; CHECK-NEXT: kmovw %esi, %k1
|
||||
; CHECK-NEXT: vmaxss %xmm1, %xmm0, %xmm0 {%k1} {z}
|
||||
; CHECK-NEXT: vmaxss (%rdi), %xmm0, %xmm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%b.val = load float, float* %b
|
||||
%bv0 = insertelement <4 x float> undef, float %b.val, i32 0
|
||||
|
@ -43,10 +41,9 @@ declare <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>, <4 x float>,
|
|||
define <4 x float> @test_maskz_add_ss(<4 x float> %a, float* %b, i8 %mask) {
|
||||
; CHECK-LABEL: test_maskz_add_ss:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; CHECK-NEXT: andl $1, %esi
|
||||
; CHECK-NEXT: kmovw %esi, %k1
|
||||
; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0 {%k1} {z}
|
||||
; CHECK-NEXT: vaddss (%rdi), %xmm0, %xmm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%b.val = load float, float* %b
|
||||
%bv0 = insertelement <4 x float> undef, float %b.val, i32 0
|
||||
|
@ -64,10 +61,9 @@ declare <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double>, <2 x double>,
|
|||
define <2 x double> @test_int_x86_avx512_mask_vfmadd_sd(<2 x double> %a, <2 x double> %b, double* %c, i8 %mask){
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_vfmadd_sd:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero
|
||||
; CHECK-NEXT: andl $1, %esi
|
||||
; CHECK-NEXT: kmovw %esi, %k1
|
||||
; CHECK-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0 {%k1}
|
||||
; CHECK-NEXT: vfmadd213sd (%rdi), %xmm1, %xmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%c.val = load double, double* %c
|
||||
%cv0 = insertelement <2 x double> undef, double %c.val, i32 0
|
||||
|
|
Loading…
Reference in New Issue