[AVX512] Add masking variant and intrinsics for valignd/q

This is similar to what I did with the two-source permutation recently.  (It's
almost too similar so that we should consider generating the masking variants
with some tablegen help.)

Both encoding and intrinsic tests are added as well.  For the latter, this is
what the IR that the intrinsic test on the clang side generates.

Part of <rdar://problem/17688758>

llvm-svn: 214890
This commit is contained in:
Adam Nemet 2014-08-05 17:23:04 +00:00
parent 4688a2e5cb
commit fd2161b710
4 changed files with 66 additions and 5 deletions

View File

@ -3184,6 +3184,18 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
[IntrNoMem]>; [IntrNoMem]>;
} }
let TargetPrefix = "x86" in {
def int_x86_avx512_mask_valign_q_512 : GCCBuiltin<"__builtin_ia32_alignq512_mask">,
Intrinsic<[llvm_v8i64_ty],
[llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_v8i64_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_valign_d_512 : GCCBuiltin<"__builtin_ia32_alignd512_mask">,
Intrinsic<[llvm_v16i32_ty],
[llvm_v16i32_ty, llvm_v16i32_ty, llvm_i8_ty, llvm_v16i32_ty, llvm_i16_ty],
[IntrNoMem]>;
}
// Misc. // Misc.
let TargetPrefix = "x86" in { let TargetPrefix = "x86" in {
def int_x86_avx512_mask_cmp_ps_512 : GCCBuiltin<"__builtin_ia32_cmpps512_mask">, def int_x86_avx512_mask_cmp_ps_512 : GCCBuiltin<"__builtin_ia32_cmpps512_mask">,

View File

@ -4461,9 +4461,9 @@ def : Pat<(v8i64 (X86Shufp VR512:$src1,
(memopv8i64 addr:$src2), (i8 imm:$imm))), (memopv8i64 addr:$src2), (i8 imm:$imm))),
(VSHUFPDZrmi VR512:$src1, addr:$src2, imm:$imm)>; (VSHUFPDZrmi VR512:$src1, addr:$src2, imm:$imm)>;
multiclass avx512_valign<string Suffix, RegisterClass RC, multiclass avx512_valign<string Suffix, RegisterClass RC, RegisterClass KRC,
X86MemOperand x86memop, ValueType IntVT, RegisterClass MRC, X86MemOperand x86memop,
ValueType FloatVT> { ValueType IntVT, ValueType FloatVT> {
def rri : AVX512AIi8<0x03, MRMSrcReg, (outs RC:$dst), def rri : AVX512AIi8<0x03, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, RC:$src2, i8imm:$src3), (ins RC:$src1, RC:$src2, i8imm:$src3),
!strconcat("valign"##Suffix, !strconcat("valign"##Suffix,
@ -4473,10 +4473,39 @@ multiclass avx512_valign<string Suffix, RegisterClass RC,
(IntVT (X86VAlign RC:$src2, RC:$src1, (IntVT (X86VAlign RC:$src2, RC:$src1,
(i8 imm:$src3))))]>, EVEX_4V; (i8 imm:$src3))))]>, EVEX_4V;
let Constraints = "$src0 = $dst", AddedComplexity=30 in
def rrik : AVX512AIi8<0x03, MRMSrcReg, (outs RC:$dst),
(ins RC:$src0, KRC:$mask, RC:$src1, RC:$src2, i8imm:$src3),
!strconcat("valign"##Suffix,
" \t{$src3, $src2, $src1, $mask, $dst|"
"$dst, $mask, $src1, $src2, $src3}"),
[(set RC:$dst,
(IntVT (vselect KRC:$mask,
(X86VAlign RC:$src2, RC:$src1,
(i8 imm:$src3)),
RC:$src0)))]>,
EVEX_4V, EVEX_K;
// Also match valign of packed floats. // Also match valign of packed floats.
def : Pat<(FloatVT (X86VAlign RC:$src1, RC:$src2, (i8 imm:$imm))), def : Pat<(FloatVT (X86VAlign RC:$src1, RC:$src2, (i8 imm:$imm))),
(!cast<Instruction>(NAME##rri) RC:$src2, RC:$src1, imm:$imm)>; (!cast<Instruction>(NAME##rri) RC:$src2, RC:$src1, imm:$imm)>;
// Non-masking intrinsic call.
def : Pat<(IntVT
(!cast<Intrinsic>("int_x86_avx512_mask_valign_"##Suffix##"_512")
RC:$src1, RC:$src2, imm:$src3,
(IntVT (bitconvert (v16i32 immAllZerosV))), -1)),
(!cast<Instruction>(NAME#rri) RC:$src1, RC:$src2, imm:$src3)>;
// Masking intrinsic call.
def : Pat<(IntVT
(!cast<Intrinsic>("int_x86_avx512_mask_valign_"##Suffix##"_512")
RC:$src1, RC:$src2, imm:$src3,
RC:$src4, MRC:$mask)),
(!cast<Instruction>(NAME#rrik) RC:$src4,
(COPY_TO_REGCLASS MRC:$mask, KRC), RC:$src1,
RC:$src2, imm:$src3)>;
let mayLoad = 1 in let mayLoad = 1 in
def rmi : AVX512AIi8<0x03, MRMSrcMem, (outs RC:$dst), def rmi : AVX512AIi8<0x03, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2, i8imm:$src3), (ins RC:$src1, x86memop:$src2, i8imm:$src3),
@ -4485,9 +4514,9 @@ multiclass avx512_valign<string Suffix, RegisterClass RC,
"$dst, $src1, $src2, $src3}"), "$dst, $src1, $src2, $src3}"),
[]>, EVEX_4V; []>, EVEX_4V;
} }
defm VALIGND : avx512_valign<"d", VR512, i512mem, v16i32, v16f32>, defm VALIGND : avx512_valign<"d", VR512, VK16WM, GR16, i512mem, v16i32, v16f32>,
EVEX_V512, EVEX_CD8<32, CD8VF>; EVEX_V512, EVEX_CD8<32, CD8VF>;
defm VALIGNQ : avx512_valign<"q", VR512, i512mem, v8i64, v8f64>, defm VALIGNQ : avx512_valign<"q", VR512, VK8WM, GR8, i512mem, v8i64, v8f64>,
VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>; VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
// Helper fragments to match sext vXi1 to vXiY. // Helper fragments to match sext vXi1 to vXiY.

View File

@ -611,3 +611,19 @@ define <8 x i64> @test_vmovntdqa(i8 *%x) {
} }
declare <8 x i64> @llvm.x86.avx512.movntdqa(i8*) declare <8 x i64> @llvm.x86.avx512.movntdqa(i8*)
define <8 x i64> @test_valign_q(<8 x i64> %a, <8 x i64> %b) {
; CHECK-LABEL: test_valign_q:
; CHECK: valignq $2, %zmm1, %zmm0, %zmm0
%res = call <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64> %a, <8 x i64> %b, i8 2, <8 x i64> zeroinitializer, i8 -1)
ret <8 x i64> %res
}
define <8 x i64> @test_mask_valign_q(<8 x i64> %a, <8 x i64> %b, <8 x i64> %src, i8 %mask) {
; CHECK-LABEL: test_mask_valign_q:
; CHECK: valignq $2, %zmm1, %zmm0, %k1, %zmm2
%res = call <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64> %a, <8 x i64> %b, i8 2, <8 x i64> %src, i8 %mask)
ret <8 x i64> %res
}
declare <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64>, <8 x i64>, i8, <8 x i64>, i8)

View File

@ -3799,3 +3799,7 @@ vpermi2q 0x80(%rax,%rbx,2), %zmm2, %zmm26 {%k3}
// CHECK: vpermt2d // CHECK: vpermt2d
// CHECK: encoding: [0x62,0x32,0x4d,0xc2,0x7e,0x24,0xad,0x05,0x00,0x00,0x00] // CHECK: encoding: [0x62,0x32,0x4d,0xc2,0x7e,0x24,0xad,0x05,0x00,0x00,0x00]
vpermt2d 5(,%r13,4), %zmm22, %zmm12 {%k2} {z} vpermt2d 5(,%r13,4), %zmm22, %zmm12 {%k2} {z}
// CHECK: valignq
// CHECK: encoding: [0x62,0xf3,0xfd,0x48,0x03,0x4c,0x24,0x04,0x02]
valignq $2, 0x100(%rsp), %zmm0, %zmm1