diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td index 018aa9a87d07..5f8ae27839c3 100644 --- a/llvm/include/llvm/IR/IntrinsicsX86.td +++ b/llvm/include/llvm/IR/IntrinsicsX86.td @@ -3184,6 +3184,18 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". [IntrNoMem]>; } +let TargetPrefix = "x86" in { + def int_x86_avx512_mask_valign_q_512 : GCCBuiltin<"__builtin_ia32_alignq512_mask">, + Intrinsic<[llvm_v8i64_ty], + [llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_v8i64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_valign_d_512 : GCCBuiltin<"__builtin_ia32_alignd512_mask">, + Intrinsic<[llvm_v16i32_ty], + [llvm_v16i32_ty, llvm_v16i32_ty, llvm_i8_ty, llvm_v16i32_ty, llvm_i16_ty], + [IntrNoMem]>; +} + // Misc. let TargetPrefix = "x86" in { def int_x86_avx512_mask_cmp_ps_512 : GCCBuiltin<"__builtin_ia32_cmpps512_mask">, diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 7328d992c1f5..5284c3f65109 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -4461,9 +4461,9 @@ def : Pat<(v8i64 (X86Shufp VR512:$src1, (memopv8i64 addr:$src2), (i8 imm:$imm))), (VSHUFPDZrmi VR512:$src1, addr:$src2, imm:$imm)>; -multiclass avx512_valign { +multiclass avx512_valign { def rri : AVX512AIi8<0x03, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2, i8imm:$src3), !strconcat("valign"##Suffix, @@ -4473,10 +4473,39 @@ multiclass avx512_valign, EVEX_4V; + let Constraints = "$src0 = $dst", AddedComplexity=30 in + def rrik : AVX512AIi8<0x03, MRMSrcReg, (outs RC:$dst), + (ins RC:$src0, KRC:$mask, RC:$src1, RC:$src2, i8imm:$src3), + !strconcat("valign"##Suffix, + " \t{$src3, $src2, $src1, $mask, $dst|" + "$dst, $mask, $src1, $src2, $src3}"), + [(set RC:$dst, + (IntVT (vselect KRC:$mask, + (X86VAlign RC:$src2, RC:$src1, + (i8 imm:$src3)), + RC:$src0)))]>, + EVEX_4V, EVEX_K; + // Also match valign of packed floats. def : Pat<(FloatVT (X86VAlign RC:$src1, RC:$src2, (i8 imm:$imm))), (!cast(NAME##rri) RC:$src2, RC:$src1, imm:$imm)>; + // Non-masking intrinsic call. + def : Pat<(IntVT + (!cast("int_x86_avx512_mask_valign_"##Suffix##"_512") + RC:$src1, RC:$src2, imm:$src3, + (IntVT (bitconvert (v16i32 immAllZerosV))), -1)), + (!cast(NAME#rri) RC:$src1, RC:$src2, imm:$src3)>; + + // Masking intrinsic call. + def : Pat<(IntVT + (!cast("int_x86_avx512_mask_valign_"##Suffix##"_512") + RC:$src1, RC:$src2, imm:$src3, + RC:$src4, MRC:$mask)), + (!cast(NAME#rrik) RC:$src4, + (COPY_TO_REGCLASS MRC:$mask, KRC), RC:$src1, + RC:$src2, imm:$src3)>; + let mayLoad = 1 in def rmi : AVX512AIi8<0x03, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$src3), @@ -4485,9 +4514,9 @@ multiclass avx512_valign, EVEX_4V; } -defm VALIGND : avx512_valign<"d", VR512, i512mem, v16i32, v16f32>, +defm VALIGND : avx512_valign<"d", VR512, VK16WM, GR16, i512mem, v16i32, v16f32>, EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VALIGNQ : avx512_valign<"q", VR512, i512mem, v8i64, v8f64>, +defm VALIGNQ : avx512_valign<"q", VR512, VK8WM, GR8, i512mem, v8i64, v8f64>, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>; // Helper fragments to match sext vXi1 to vXiY. diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics.ll b/llvm/test/CodeGen/X86/avx512-intrinsics.ll index 6f34d4596f9f..eaaf915b0cb2 100644 --- a/llvm/test/CodeGen/X86/avx512-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512-intrinsics.ll @@ -611,3 +611,19 @@ define <8 x i64> @test_vmovntdqa(i8 *%x) { } declare <8 x i64> @llvm.x86.avx512.movntdqa(i8*) + +define <8 x i64> @test_valign_q(<8 x i64> %a, <8 x i64> %b) { +; CHECK-LABEL: test_valign_q: +; CHECK: valignq $2, %zmm1, %zmm0, %zmm0 + %res = call <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64> %a, <8 x i64> %b, i8 2, <8 x i64> zeroinitializer, i8 -1) + ret <8 x i64> %res +} + +define <8 x i64> @test_mask_valign_q(<8 x i64> %a, <8 x i64> %b, <8 x i64> %src, i8 %mask) { +; CHECK-LABEL: test_mask_valign_q: +; CHECK: valignq $2, %zmm1, %zmm0, %k1, %zmm2 + %res = call <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64> %a, <8 x i64> %b, i8 2, <8 x i64> %src, i8 %mask) + ret <8 x i64> %res +} + +declare <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64>, <8 x i64>, i8, <8 x i64>, i8) diff --git a/llvm/test/MC/X86/avx512-encodings.s b/llvm/test/MC/X86/avx512-encodings.s index e71b83cfd304..e96056d79738 100644 --- a/llvm/test/MC/X86/avx512-encodings.s +++ b/llvm/test/MC/X86/avx512-encodings.s @@ -3799,3 +3799,7 @@ vpermi2q 0x80(%rax,%rbx,2), %zmm2, %zmm26 {%k3} // CHECK: vpermt2d // CHECK: encoding: [0x62,0x32,0x4d,0xc2,0x7e,0x24,0xad,0x05,0x00,0x00,0x00] vpermt2d 5(,%r13,4), %zmm22, %zmm12 {%k2} {z} + +// CHECK: valignq +// CHECK: encoding: [0x62,0xf3,0xfd,0x48,0x03,0x4c,0x24,0x04,0x02] +valignq $2, 0x100(%rsp), %zmm0, %zmm1