From c7de3a101885c5b9b167a82b0be37dfb23551aa2 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 29 Jul 2016 02:49:08 +0000 Subject: [PATCH] [AVX512] Remove the intrinsic forms of VMOVSS/VMOVSD. We don't need two different forms of 'rr' and 'rm'. This matches SSE/AVX. I'm not convinced the patterns for the rm_Int was correct anyway. It had a tied source that should't exist for the unmasked version. The load form of MOVSS always zeros the most significant bits. I've left the patterns off the masked load instructions as I'm not sure what the correct pattern should be and we don't have any tests currently. Nor do we implement masked scalar load intrinsics in clang currently. llvm-svn: 277098 --- llvm/lib/Target/X86/X86InstrAVX512.td | 72 +++++++++++++--------- llvm/test/CodeGen/X86/avx512-intrinsics.ll | 2 +- llvm/test/CodeGen/X86/avx512-select.ll | 2 +- 3 files changed, 45 insertions(+), 31 deletions(-) diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 07e84c3a17ce..4ba09b2adf62 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -3025,39 +3025,53 @@ def VMOVQI2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst), // AVX-512 MOVSS, MOVSD //===----------------------------------------------------------------------===// -multiclass avx512_move_scalar { - defm rr_Int : AVX512_maskable_scalar<0x10, MRMSrcReg, _, (outs _.RC:$dst), - (ins _.RC:$src1, _.RC:$src2), - asm, "$src2, $src1","$src1, $src2", - (_.VT (OpNode (_.VT _.RC:$src1), - (_.VT _.RC:$src2))), - IIC_SSE_MOV_S_RR>, EVEX_4V; - let Constraints = "$src1 = $dst" in - defm rm_Int : AVX512_maskable_3src_scalar<0x10, MRMSrcMem, _, - (outs _.RC:$dst), - (ins _.ScalarMemOp:$src), - asm,"$src","$src", - (_.VT (OpNode (_.VT _.RC:$src1), - (_.VT (scalar_to_vector - (_.ScalarLdFrag addr:$src)))))>, EVEX; - let isCodeGenOnly = 1 in { - def rr : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst), - (ins _.RC:$src1, _.FRC:$src2), - !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, - (scalar_to_vector _.FRC:$src2))))], - _.ExeDomain,IIC_SSE_MOV_S_RR>, EVEX_4V; - def rm : AVX512PI<0x10, MRMSrcMem, (outs _.FRC:$dst), (ins _.ScalarMemOp:$src), - !strconcat(asm, "\t{$src, $dst|$dst, $src}"), - [(set _.FRC:$dst, (_.ScalarLdFrag addr:$src))], - _.ExeDomain, IIC_SSE_MOV_S_RM>, EVEX; + def rr : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst), + (ins _.RC:$src1, _.FRC:$src2), + !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, + (scalar_to_vector _.FRC:$src2))))], + _.ExeDomain,IIC_SSE_MOV_S_RR>, EVEX_4V; + def rrkz : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst), + (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), + !strconcat(asm, "\t{$src2, $src1, $dst {${mask}} {z}|", + "$dst {${mask}} {z}, $src1, $src2}"), + [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask, + (_.VT (OpNode _.RC:$src1, _.RC:$src2)), + _.ImmAllZerosV)))], + _.ExeDomain,IIC_SSE_MOV_S_RR>, EVEX_4V, EVEX_KZ; + let Constraints = "$src0 = $dst" in + def rrk : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst), + (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), + !strconcat(asm, "\t{$src2, $src1, $dst {${mask}}|", + "$dst {${mask}}, $src1, $src2}"), + [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask, + (_.VT (OpNode _.RC:$src1, _.RC:$src2)), + (_.VT _.RC:$src0))))], + _.ExeDomain,IIC_SSE_MOV_S_RR>, EVEX_4V, EVEX_K; + def rm : AVX512PI<0x10, MRMSrcMem, (outs _.FRC:$dst), (ins _.ScalarMemOp:$src), + !strconcat(asm, "\t{$src, $dst|$dst, $src}"), + [(set _.FRC:$dst, (_.ScalarLdFrag addr:$src))], + _.ExeDomain, IIC_SSE_MOV_S_RM>, EVEX; + let mayLoad = 1, hasSideEffects = 0 in { + let Constraints = "$src0 = $dst" in + def rmk : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst), + (ins _.RC:$src0, _.KRCWM:$mask, _.ScalarMemOp:$src), + !strconcat(asm, "\t{$src, $dst {${mask}}|", + "$dst {${mask}}, $src}"), + [], _.ExeDomain, IIC_SSE_MOV_S_RM>, EVEX, EVEX_K; + def rmkz : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst), + (ins _.KRCWM:$mask, _.ScalarMemOp:$src), + !strconcat(asm, "\t{$src, $dst {${mask}} {z}|", + "$dst {${mask}} {z}, $src}"), + [], _.ExeDomain, IIC_SSE_MOV_S_RM>, EVEX, EVEX_KZ; } def mr: AVX512PI<0x11, MRMDestMem, (outs), (ins _.ScalarMemOp:$dst, _.FRC:$src), !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [(store _.FRC:$src, addr:$dst)], _.ExeDomain, IIC_SSE_MOV_S_MR>, EVEX; - let mayStore = 1 in + let mayStore = 1, hasSideEffects = 0 in def mrk: AVX512PI<0x11, MRMDestMem, (outs), (ins _.ScalarMemOp:$dst, VK1WM:$mask, _.FRC:$src), !strconcat(asm, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"), @@ -3071,11 +3085,11 @@ defm VMOVSDZ : avx512_move_scalar<"vmovsd", X86Movsd, f64x_info>, VEX_LIG, XD, VEX_W, EVEX_CD8<64, CD8VT1>; def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))), - (COPY_TO_REGCLASS (VMOVSSZrr_Intk (COPY_TO_REGCLASS FR32X:$src2, VR128X), + (COPY_TO_REGCLASS (VMOVSSZrrk (COPY_TO_REGCLASS FR32X:$src2, VR128X), VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),(COPY_TO_REGCLASS FR32X:$src1, VR128X)), FR32X)>; def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))), - (COPY_TO_REGCLASS (VMOVSDZrr_Intk (COPY_TO_REGCLASS FR64X:$src2, VR128X), + (COPY_TO_REGCLASS (VMOVSDZrrk (COPY_TO_REGCLASS FR64X:$src2, VR128X), VK1WM:$mask, (v2f64 (IMPLICIT_DEF)), (COPY_TO_REGCLASS FR64X:$src1, VR128X)), FR64X)>; def : Pat<(int_x86_avx512_mask_store_ss addr:$dst, VR128X:$src, GR8:$mask), diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics.ll b/llvm/test/CodeGen/X86/avx512-intrinsics.ll index cab1aae11424..cc5b7916c0b4 100644 --- a/llvm/test/CodeGen/X86/avx512-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512-intrinsics.ll @@ -5413,7 +5413,7 @@ define <2 x double>@test_int_x86_avx512_mask_move_sd_rrk(<2 x double> %x0, <2 x ; CHECK-NEXT: andl $1, %edi ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vmovsd %xmm1, %xmm0, %xmm2 {%k1} -; CHECK-NEXT: vmovaps %zmm2, %zmm0 +; CHECK-NEXT: vmovapd %zmm2, %zmm0 ; CHECK-NEXT: retq %res = call <2 x double> @llvm.x86.avx512.mask.move.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) ret <2 x double> %res diff --git a/llvm/test/CodeGen/X86/avx512-select.ll b/llvm/test/CodeGen/X86/avx512-select.ll index fab6b3dda242..d4b9c19202e4 100644 --- a/llvm/test/CodeGen/X86/avx512-select.ll +++ b/llvm/test/CodeGen/X86/avx512-select.ll @@ -52,7 +52,7 @@ define double @select03(double %a, double %b, double %c, double %eps) { ; CHECK: ## BB#0: ; CHECK-NEXT: vcmplesd %xmm0, %xmm3, %k1 ; CHECK-NEXT: vmovsd %xmm2, %xmm0, %xmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vmovapd %zmm1, %zmm0 ; CHECK-NEXT: retq %cmp = fcmp oge double %a, %eps %cond = select i1 %cmp, double %c, double %b