diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 07e84c3a17ce..4ba09b2adf62 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -3025,39 +3025,53 @@ def VMOVQI2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst), // AVX-512 MOVSS, MOVSD //===----------------------------------------------------------------------===// -multiclass avx512_move_scalar { - defm rr_Int : AVX512_maskable_scalar<0x10, MRMSrcReg, _, (outs _.RC:$dst), - (ins _.RC:$src1, _.RC:$src2), - asm, "$src2, $src1","$src1, $src2", - (_.VT (OpNode (_.VT _.RC:$src1), - (_.VT _.RC:$src2))), - IIC_SSE_MOV_S_RR>, EVEX_4V; - let Constraints = "$src1 = $dst" in - defm rm_Int : AVX512_maskable_3src_scalar<0x10, MRMSrcMem, _, - (outs _.RC:$dst), - (ins _.ScalarMemOp:$src), - asm,"$src","$src", - (_.VT (OpNode (_.VT _.RC:$src1), - (_.VT (scalar_to_vector - (_.ScalarLdFrag addr:$src)))))>, EVEX; - let isCodeGenOnly = 1 in { - def rr : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst), - (ins _.RC:$src1, _.FRC:$src2), - !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, - (scalar_to_vector _.FRC:$src2))))], - _.ExeDomain,IIC_SSE_MOV_S_RR>, EVEX_4V; - def rm : AVX512PI<0x10, MRMSrcMem, (outs _.FRC:$dst), (ins _.ScalarMemOp:$src), - !strconcat(asm, "\t{$src, $dst|$dst, $src}"), - [(set _.FRC:$dst, (_.ScalarLdFrag addr:$src))], - _.ExeDomain, IIC_SSE_MOV_S_RM>, EVEX; + def rr : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst), + (ins _.RC:$src1, _.FRC:$src2), + !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, + (scalar_to_vector _.FRC:$src2))))], + _.ExeDomain,IIC_SSE_MOV_S_RR>, EVEX_4V; + def rrkz : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst), + (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), + !strconcat(asm, "\t{$src2, $src1, $dst {${mask}} {z}|", + "$dst {${mask}} {z}, $src1, $src2}"), + [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask, + (_.VT (OpNode _.RC:$src1, _.RC:$src2)), + _.ImmAllZerosV)))], + _.ExeDomain,IIC_SSE_MOV_S_RR>, EVEX_4V, EVEX_KZ; + let Constraints = "$src0 = $dst" in + def rrk : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst), + (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), + !strconcat(asm, "\t{$src2, $src1, $dst {${mask}}|", + "$dst {${mask}}, $src1, $src2}"), + [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask, + (_.VT (OpNode _.RC:$src1, _.RC:$src2)), + (_.VT _.RC:$src0))))], + _.ExeDomain,IIC_SSE_MOV_S_RR>, EVEX_4V, EVEX_K; + def rm : AVX512PI<0x10, MRMSrcMem, (outs _.FRC:$dst), (ins _.ScalarMemOp:$src), + !strconcat(asm, "\t{$src, $dst|$dst, $src}"), + [(set _.FRC:$dst, (_.ScalarLdFrag addr:$src))], + _.ExeDomain, IIC_SSE_MOV_S_RM>, EVEX; + let mayLoad = 1, hasSideEffects = 0 in { + let Constraints = "$src0 = $dst" in + def rmk : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst), + (ins _.RC:$src0, _.KRCWM:$mask, _.ScalarMemOp:$src), + !strconcat(asm, "\t{$src, $dst {${mask}}|", + "$dst {${mask}}, $src}"), + [], _.ExeDomain, IIC_SSE_MOV_S_RM>, EVEX, EVEX_K; + def rmkz : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst), + (ins _.KRCWM:$mask, _.ScalarMemOp:$src), + !strconcat(asm, "\t{$src, $dst {${mask}} {z}|", + "$dst {${mask}} {z}, $src}"), + [], _.ExeDomain, IIC_SSE_MOV_S_RM>, EVEX, EVEX_KZ; } def mr: AVX512PI<0x11, MRMDestMem, (outs), (ins _.ScalarMemOp:$dst, _.FRC:$src), !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [(store _.FRC:$src, addr:$dst)], _.ExeDomain, IIC_SSE_MOV_S_MR>, EVEX; - let mayStore = 1 in + let mayStore = 1, hasSideEffects = 0 in def mrk: AVX512PI<0x11, MRMDestMem, (outs), (ins _.ScalarMemOp:$dst, VK1WM:$mask, _.FRC:$src), !strconcat(asm, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"), @@ -3071,11 +3085,11 @@ defm VMOVSDZ : avx512_move_scalar<"vmovsd", X86Movsd, f64x_info>, VEX_LIG, XD, VEX_W, EVEX_CD8<64, CD8VT1>; def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))), - (COPY_TO_REGCLASS (VMOVSSZrr_Intk (COPY_TO_REGCLASS FR32X:$src2, VR128X), + (COPY_TO_REGCLASS (VMOVSSZrrk (COPY_TO_REGCLASS FR32X:$src2, VR128X), VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),(COPY_TO_REGCLASS FR32X:$src1, VR128X)), FR32X)>; def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))), - (COPY_TO_REGCLASS (VMOVSDZrr_Intk (COPY_TO_REGCLASS FR64X:$src2, VR128X), + (COPY_TO_REGCLASS (VMOVSDZrrk (COPY_TO_REGCLASS FR64X:$src2, VR128X), VK1WM:$mask, (v2f64 (IMPLICIT_DEF)), (COPY_TO_REGCLASS FR64X:$src1, VR128X)), FR64X)>; def : Pat<(int_x86_avx512_mask_store_ss addr:$dst, VR128X:$src, GR8:$mask), diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics.ll b/llvm/test/CodeGen/X86/avx512-intrinsics.ll index cab1aae11424..cc5b7916c0b4 100644 --- a/llvm/test/CodeGen/X86/avx512-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512-intrinsics.ll @@ -5413,7 +5413,7 @@ define <2 x double>@test_int_x86_avx512_mask_move_sd_rrk(<2 x double> %x0, <2 x ; CHECK-NEXT: andl $1, %edi ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vmovsd %xmm1, %xmm0, %xmm2 {%k1} -; CHECK-NEXT: vmovaps %zmm2, %zmm0 +; CHECK-NEXT: vmovapd %zmm2, %zmm0 ; CHECK-NEXT: retq %res = call <2 x double> @llvm.x86.avx512.mask.move.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) ret <2 x double> %res diff --git a/llvm/test/CodeGen/X86/avx512-select.ll b/llvm/test/CodeGen/X86/avx512-select.ll index fab6b3dda242..d4b9c19202e4 100644 --- a/llvm/test/CodeGen/X86/avx512-select.ll +++ b/llvm/test/CodeGen/X86/avx512-select.ll @@ -52,7 +52,7 @@ define double @select03(double %a, double %b, double %c, double %eps) { ; CHECK: ## BB#0: ; CHECK-NEXT: vcmplesd %xmm0, %xmm3, %k1 ; CHECK-NEXT: vmovsd %xmm2, %xmm0, %xmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vmovapd %zmm1, %zmm0 ; CHECK-NEXT: retq %cmp = fcmp oge double %a, %eps %cond = select i1 %cmp, double %c, double %b