forked from OSchip/llvm-project
[x86][inline-asm][AVX512][clang][PART-1] Introducing "k" and "Yk" constraints for extended inline assembly, enabling use of AVX512 masked vectorized instructions.
Commit on behalf of mharoush Extending inline assembly support, compatible with GCC as folowing: "k" constraint hints the compiler to select any of AVX512 k0-k7 registers. "Yk" constraint is a subset of "k" excluding k0 which is not allowd to be used as a mask. Reviewer: 1. rnk Differential Revision: https://reviews.llvm.org/D25063 llvm-svn: 285604
This commit is contained in:
parent
8a92dcc53b
commit
849a6a5e5a
|
@ -3997,6 +3997,7 @@ X86TargetInfo::validateAsmConstraint(const char *&Name,
|
|||
case 't': // Any SSE register, when SSE2 is enabled.
|
||||
case 'i': // Any SSE register, when SSE2 and inter-unit moves enabled.
|
||||
case 'm': // Any MMX register, when inter-unit moves enabled.
|
||||
case 'k': // AVX512 arch mask registers: k1-k7.
|
||||
Info.setAllowsRegister();
|
||||
return true;
|
||||
}
|
||||
|
@ -4018,6 +4019,8 @@ X86TargetInfo::validateAsmConstraint(const char *&Name,
|
|||
case 'q': // Any register accessible as [r]l: a, b, c, and d.
|
||||
case 'y': // Any MMX register.
|
||||
case 'x': // Any SSE register.
|
||||
case 'k': // Any AVX512 mask register (same as Yk, additionaly allows k0
|
||||
// for intermideate k reg operations).
|
||||
case 'Q': // Any register accessible as [r]h: a, b, c, and d.
|
||||
case 'R': // "Legacy" registers: ax, bx, cx, dx, di, si, sp, bp.
|
||||
case 'l': // "Index" registers: any general register that can be used as an
|
||||
|
@ -4051,6 +4054,8 @@ bool X86TargetInfo::validateOperandSize(StringRef Constraint,
|
|||
unsigned Size) const {
|
||||
switch (Constraint[0]) {
|
||||
default: break;
|
||||
case 'k':
|
||||
// Registers k0-k7 (AVX512) size limit is 64 bit.
|
||||
case 'y':
|
||||
return Size <= 64;
|
||||
case 'f':
|
||||
|
@ -4071,6 +4076,7 @@ bool X86TargetInfo::validateOperandSize(StringRef Constraint,
|
|||
default: break;
|
||||
case 'm':
|
||||
// 'Ym' is synonymous with 'y'.
|
||||
case 'k':
|
||||
return Size <= 64;
|
||||
case 'i':
|
||||
case 't':
|
||||
|
@ -4102,6 +4108,20 @@ X86TargetInfo::convertConstraint(const char *&Constraint) const {
|
|||
return std::string("{st}");
|
||||
case 'u': // second from top of floating point stack.
|
||||
return std::string("{st(1)}"); // second from top of floating point stack.
|
||||
case 'Y':
|
||||
switch (Constraint[1]) {
|
||||
default:
|
||||
// Break from inner switch and fall through (copy single char),
|
||||
// continue parsing after copying the current constraint into
|
||||
// the return string.
|
||||
break;
|
||||
case 'k':
|
||||
// "^" hints llvm that this is a 2 letter constraint.
|
||||
// "Constraint++" is used to promote the string iterator
|
||||
// to the next constraint.
|
||||
return std::string("^") + std::string(Constraint++, 2);
|
||||
}
|
||||
LLVM_FALLTHROUGH;
|
||||
default:
|
||||
return std::string(1, *Constraint);
|
||||
}
|
||||
|
|
|
@ -0,0 +1,59 @@
|
|||
// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-cpu skylake-avx512 -O0 -emit-llvm -S -o - -Wall -Werror | FileCheck %s
|
||||
// This test checks validity of att\gcc style inline assmebly for avx512 k and Yk constraints.
|
||||
// Also checks mask register allows flexible type (size <= 64 bit)
|
||||
|
||||
void mask_Yk_i8(char msk){
|
||||
//CHECK: vpaddb %xmm1, %xmm0, %xmm1 {%k1}
|
||||
asm ("vpaddb\t %%xmm1, %%xmm0, %%xmm1 %{%0%}\t"
|
||||
: //output
|
||||
: "Yk" (msk)); //inputs
|
||||
}
|
||||
|
||||
void mask_Yk_i16(short msk){
|
||||
//CHECK: vpaddb %xmm1, %xmm0, %xmm1 {%k1}
|
||||
asm ("vpaddb\t %%xmm1, %%xmm0, %%xmm1 %{%0%}\t"
|
||||
: //output
|
||||
: "Yk" (msk)); //inputs
|
||||
}
|
||||
|
||||
void mask_Yk_i32(int msk){
|
||||
//CHECK: vpaddb %xmm1, %xmm0, %xmm1 {%k1}
|
||||
asm ("vpaddb\t %%xmm1, %%xmm0, %%xmm1 %{%0%}\t"
|
||||
: //output
|
||||
: "Yk" (msk)); //inputs
|
||||
}
|
||||
|
||||
void mask_Yk_i64(long long msk){
|
||||
//CHECK: vpaddb %xmm1, %xmm0, %xmm1 {%k1}
|
||||
asm ("vpaddb\t %%xmm1, %%xmm0, %%xmm1 %{%0%}\t"
|
||||
: //output
|
||||
: "Yk" (msk)); //inputs
|
||||
}
|
||||
|
||||
void k_wise_op_i8(char msk_dst,char msk_src1,char msk_src2){
|
||||
//CHECK: kandw %k1, %k0, %k0
|
||||
asm ("kandw\t%2, %1, %0"
|
||||
: "=k" (msk_dst)
|
||||
: "k" (msk_src1), "k" (msk_src2));
|
||||
}
|
||||
|
||||
void k_wise_op_i16(short msk_dst, short msk_src1, short msk_src2){
|
||||
//CHECK: kandw %k1, %k0, %k0
|
||||
asm ("kandw\t%2, %1, %0"
|
||||
: "=k" (msk_dst)
|
||||
: "k" (msk_src1), "k" (msk_src2));
|
||||
}
|
||||
|
||||
void k_wise_op_i32(int msk_dst, int msk_src1, int msk_src2){
|
||||
//CHECK: kandw %k1, %k0, %k0
|
||||
asm ("kandw\t%2, %1, %0"
|
||||
: "=k" (msk_dst)
|
||||
: "k" (msk_src1), "k" (msk_src2));
|
||||
}
|
||||
|
||||
void k_wise_op_i64(long long msk_dst, long long msk_src1, long long msk_src2){
|
||||
//CHECK: kandw %k1, %k0, %k0
|
||||
asm ("kandw\t%2, %1, %0"
|
||||
: "=k" (msk_dst)
|
||||
: "k" (msk_src1), "k" (msk_src2));
|
||||
}
|
Loading…
Reference in New Issue