[X86] Add kortest intrinsics for 8, 32, and 64 bit masks. Add new intrinsic names for 16 bit masks.

This matches gcc and icc despite not being documented in the Intel Intrinsics Guide.

llvm-svn: 340798
This commit is contained in:
Craig Topper 2018-08-28 06:28:25 +00:00
parent c7506b28c1
commit cb5fd56c7f
8 changed files with 275 additions and 6 deletions

View File

@ -1749,8 +1749,14 @@ TARGET_BUILTIN(__builtin_ia32_korqi, "UcUcUc", "nc", "avx512dq")
TARGET_BUILTIN(__builtin_ia32_korhi, "UsUsUs", "nc", "avx512f")
TARGET_BUILTIN(__builtin_ia32_korsi, "UiUiUi", "nc", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_kordi, "ULLiULLiULLi", "nc", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_kortestcqi, "iUcUc", "nc", "avx512dq")
TARGET_BUILTIN(__builtin_ia32_kortestzqi, "iUcUc", "nc", "avx512dq")
TARGET_BUILTIN(__builtin_ia32_kortestchi, "iUsUs", "nc", "avx512f")
TARGET_BUILTIN(__builtin_ia32_kortestzhi, "iUsUs", "nc", "avx512f")
TARGET_BUILTIN(__builtin_ia32_kortestcsi, "iUiUi", "nc", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_kortestzsi, "iUiUi", "nc", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_kortestcdi, "iULLiULLi", "nc", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_kortestzdi, "iULLiULLi", "nc", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_kunpckhi, "UsUsUs", "nc", "avx512f")
TARGET_BUILTIN(__builtin_ia32_kxnorqi, "UcUcUc", "nc", "avx512dq")
TARGET_BUILTIN(__builtin_ia32_kxnorhi, "UsUsUs", "nc", "avx512f")

View File

@ -10012,14 +10012,21 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
return EmitX86MaskedCompare(*this, CC, false, Ops);
}
case X86::BI__builtin_ia32_kortestcqi:
case X86::BI__builtin_ia32_kortestchi:
case X86::BI__builtin_ia32_kortestzhi: {
case X86::BI__builtin_ia32_kortestcsi:
case X86::BI__builtin_ia32_kortestcdi: {
Value *Or = EmitX86MaskLogic(*this, Instruction::Or, Ops);
Value *C;
if (BuiltinID == X86::BI__builtin_ia32_kortestchi)
C = llvm::Constant::getAllOnesValue(Builder.getInt16Ty());
else
C = llvm::Constant::getNullValue(Builder.getInt16Ty());
Value *C = llvm::Constant::getAllOnesValue(Ops[0]->getType());
Value *Cmp = Builder.CreateICmpEQ(Or, C);
return Builder.CreateZExt(Cmp, ConvertType(E->getType()));
}
case X86::BI__builtin_ia32_kortestzqi:
case X86::BI__builtin_ia32_kortestzhi:
case X86::BI__builtin_ia32_kortestzsi:
case X86::BI__builtin_ia32_kortestzdi: {
Value *Or = EmitX86MaskLogic(*this, Instruction::Or, Ops);
Value *C = llvm::Constant::getNullValue(Ops[0]->getType());
Value *Cmp = Builder.CreateICmpEQ(Or, C);
return Builder.CreateZExt(Cmp, ConvertType(E->getType()));
}

View File

@ -107,6 +107,42 @@ _kxor_mask64(__mmask64 __A, __mmask64 __B)
return (__mmask64)__builtin_ia32_kxordi((__mmask64)__A, (__mmask64)__B);
}
static __inline__ unsigned char __DEFAULT_FN_ATTRS
_kortestc_mask32_u8(__mmask32 __A, __mmask32 __B)
{
return (unsigned char)__builtin_ia32_kortestcsi(__A, __B);
}
static __inline__ unsigned char __DEFAULT_FN_ATTRS
_kortestz_mask32_u8(__mmask32 __A, __mmask32 __B)
{
return (unsigned char)__builtin_ia32_kortestzsi(__A, __B);
}
static __inline__ unsigned char __DEFAULT_FN_ATTRS
_kortest_mask32_u8(__mmask32 __A, __mmask32 __B, unsigned char *__C) {
*__C = (unsigned char)__builtin_ia32_kortestcsi(__A, __B);
return (unsigned char)__builtin_ia32_kortestzsi(__A, __B);
}
static __inline__ unsigned char __DEFAULT_FN_ATTRS
_kortestc_mask64_u8(__mmask64 __A, __mmask64 __B)
{
return (unsigned char)__builtin_ia32_kortestcdi(__A, __B);
}
static __inline__ unsigned char __DEFAULT_FN_ATTRS
_kortestz_mask64_u8(__mmask64 __A, __mmask64 __B)
{
return (unsigned char)__builtin_ia32_kortestzdi(__A, __B);
}
static __inline__ unsigned char __DEFAULT_FN_ATTRS
_kortest_mask64_u8(__mmask64 __A, __mmask64 __B, unsigned char *__C) {
*__C = (unsigned char)__builtin_ia32_kortestcdi(__A, __B);
return (unsigned char)__builtin_ia32_kortestzdi(__A, __B);
}
/* Integer compare */
#define _mm512_cmp_epi8_mask(a, b, p) \

View File

@ -68,6 +68,24 @@ _kxor_mask8(__mmask8 __A, __mmask8 __B)
return (__mmask8)__builtin_ia32_kxorqi((__mmask8)__A, (__mmask8)__B);
}
static __inline__ unsigned char __DEFAULT_FN_ATTRS
_kortestc_mask8_u8(__mmask8 __A, __mmask8 __B)
{
return (unsigned char)__builtin_ia32_kortestcqi(__A, __B);
}
static __inline__ unsigned char __DEFAULT_FN_ATTRS
_kortestz_mask8_u8(__mmask8 __A, __mmask8 __B)
{
return (unsigned char)__builtin_ia32_kortestzqi(__A, __B);
}
static __inline__ unsigned char __DEFAULT_FN_ATTRS
_kortest_mask8_u8(__mmask8 __A, __mmask8 __B, unsigned char *__C) {
*__C = (unsigned char)__builtin_ia32_kortestcqi(__A, __B);
return (unsigned char)__builtin_ia32_kortestzqi(__A, __B);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_mullo_epi64 (__m512i __A, __m512i __B) {
return (__m512i) ((__v8du) __A * (__v8du) __B);

View File

@ -8351,6 +8351,24 @@ _mm512_kortestz (__mmask16 __A, __mmask16 __B)
return __builtin_ia32_kortestzhi ((__mmask16) __A, (__mmask16) __B);
}
static __inline__ unsigned char __DEFAULT_FN_ATTRS
_kortestc_mask16_u8(__mmask16 __A, __mmask16 __B)
{
return (unsigned char)__builtin_ia32_kortestchi(__A, __B);
}
static __inline__ unsigned char __DEFAULT_FN_ATTRS
_kortestz_mask16_u8(__mmask16 __A, __mmask16 __B)
{
return (unsigned char)__builtin_ia32_kortestzhi(__A, __B);
}
static __inline__ unsigned char __DEFAULT_FN_ATTRS
_kortest_mask16_u8(__mmask16 __A, __mmask16 __B, unsigned char *__C) {
*__C = (unsigned char)__builtin_ia32_kortestchi(__A, __B);
return (unsigned char)__builtin_ia32_kortestzhi(__A, __B);
}
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
_mm512_kunpackb (__mmask16 __A, __mmask16 __B)
{

View File

@ -134,6 +134,98 @@ __mmask64 test_kxor_mask64(__m512i __A, __m512i __B, __m512i __C, __m512i __D, _
__E, __F);
}
unsigned char test_kortestz_mask32_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D) {
// CHECK-LABEL: @test_kortestz_mask32_u8
// CHECK: [[LHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
// CHECK: [[RHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
// CHECK: [[OR:%.*]] = or <32 x i1> [[LHS]], [[RHS]]
// CHECK: [[CAST:%.*]] = bitcast <32 x i1> [[OR]] to i32
// CHECK: [[CMP:%.*]] = icmp eq i32 [[CAST]], 0
// CHECK: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
// CHECK: trunc i32 [[ZEXT]] to i8
return _kortestz_mask32_u8(_mm512_cmpneq_epu16_mask(__A, __B),
_mm512_cmpneq_epu16_mask(__C, __D));
}
unsigned char test_kortestc_mask32_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D) {
// CHECK-LABEL: @test_kortestc_mask32_u8
// CHECK: [[LHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
// CHECK: [[RHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
// CHECK: [[OR:%.*]] = or <32 x i1> [[LHS]], [[RHS]]
// CHECK: [[CAST:%.*]] = bitcast <32 x i1> [[OR]] to i32
// CHECK: [[CMP:%.*]] = icmp eq i32 [[CAST]], -1
// CHECK: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
// CHECK: trunc i32 [[ZEXT]] to i8
return _kortestc_mask32_u8(_mm512_cmpneq_epu16_mask(__A, __B),
_mm512_cmpneq_epu16_mask(__C, __D));
}
unsigned char test_kortest_mask32_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D, unsigned char *CF) {
// CHECK-LABEL: @test_kortest_mask32_u8
// CHECK: [[LHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
// CHECK: [[RHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
// CHECK: [[OR:%.*]] = or <32 x i1> [[LHS]], [[RHS]]
// CHECK: [[CAST:%.*]] = bitcast <32 x i1> [[OR]] to i32
// CHECK: [[CMP:%.*]] = icmp eq i32 [[CAST]], -1
// CHECK: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
// CHECK: trunc i32 [[ZEXT]] to i8
// CHECK: [[LHS2:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
// CHECK: [[RHS2:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
// CHECK: [[OR2:%.*]] = or <32 x i1> [[LHS2]], [[RHS2]]
// CHECK: [[CAST2:%.*]] = bitcast <32 x i1> [[OR2]] to i32
// CHECK: [[CMP2:%.*]] = icmp eq i32 [[CAST2]], 0
// CHECK: [[ZEXT2:%.*]] = zext i1 [[CMP2]] to i32
// CHECK: trunc i32 [[ZEXT2]] to i8
return _kortest_mask32_u8(_mm512_cmpneq_epu16_mask(__A, __B),
_mm512_cmpneq_epu16_mask(__C, __D), CF);
}
unsigned char test_kortestz_mask64_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D) {
// CHECK-LABEL: @test_kortestz_mask64_u8
// CHECK: [[LHS:%.*]] = bitcast i64 %{{.*}} to <64 x i1>
// CHECK: [[RHS:%.*]] = bitcast i64 %{{.*}} to <64 x i1>
// CHECK: [[OR:%.*]] = or <64 x i1> [[LHS]], [[RHS]]
// CHECK: [[CAST:%.*]] = bitcast <64 x i1> [[OR]] to i64
// CHECK: [[CMP:%.*]] = icmp eq i64 [[CAST]], 0
// CHECK: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
// CHECK: trunc i32 [[ZEXT]] to i8
return _kortestz_mask64_u8(_mm512_cmpneq_epu8_mask(__A, __B),
_mm512_cmpneq_epu8_mask(__C, __D));
}
unsigned char test_kortestc_mask64_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D) {
// CHECK-LABEL: @test_kortestc_mask64_u8
// CHECK: [[LHS:%.*]] = bitcast i64 %{{.*}} to <64 x i1>
// CHECK: [[RHS:%.*]] = bitcast i64 %{{.*}} to <64 x i1>
// CHECK: [[OR:%.*]] = or <64 x i1> [[LHS]], [[RHS]]
// CHECK: [[CAST:%.*]] = bitcast <64 x i1> [[OR]] to i64
// CHECK: [[CMP:%.*]] = icmp eq i64 [[CAST]], -1
// CHECK: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
// CHECK: trunc i32 [[ZEXT]] to i8
return _kortestc_mask64_u8(_mm512_cmpneq_epu8_mask(__A, __B),
_mm512_cmpneq_epu8_mask(__C, __D));
}
unsigned char test_kortest_mask64_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D, unsigned char *CF) {
// CHECK-LABEL: @test_kortest_mask64_u8
// CHECK: [[LHS:%.*]] = bitcast i64 %{{.*}} to <64 x i1>
// CHECK: [[RHS:%.*]] = bitcast i64 %{{.*}} to <64 x i1>
// CHECK: [[OR:%.*]] = or <64 x i1> [[LHS]], [[RHS]]
// CHECK: [[CAST:%.*]] = bitcast <64 x i1> [[OR]] to i64
// CHECK: [[CMP:%.*]] = icmp eq i64 [[CAST]], -1
// CHECK: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
// CHECK: trunc i32 [[ZEXT]] to i8
// CHECK: [[LHS2:%.*]] = bitcast i64 %{{.*}} to <64 x i1>
// CHECK: [[RHS2:%.*]] = bitcast i64 %{{.*}} to <64 x i1>
// CHECK: [[OR2:%.*]] = or <64 x i1> [[LHS2]], [[RHS2]]
// CHECK: [[CAST2:%.*]] = bitcast <64 x i1> [[OR2]] to i64
// CHECK: [[CMP2:%.*]] = icmp eq i64 [[CAST2]], 0
// CHECK: [[ZEXT2:%.*]] = zext i1 [[CMP2]] to i32
// CHECK: trunc i32 [[ZEXT2]] to i8
return _kortest_mask64_u8(_mm512_cmpneq_epu8_mask(__A, __B),
_mm512_cmpneq_epu8_mask(__C, __D), CF);
}
__mmask64 test_mm512_cmpeq_epi8_mask(__m512i __a, __m512i __b) {
// CHECK-LABEL: @test_mm512_cmpeq_epi8_mask
// CHECK: icmp eq <64 x i8> %{{.*}}, %{{.*}}

View File

@ -68,6 +68,52 @@ __mmask8 test_kxor_mask8(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m
__E, __F);
}
unsigned char test_kortestz_mask8_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D) {
// CHECK-LABEL: @test_kortestz_mask8_u8
// CHECK: [[LHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
// CHECK: [[RHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
// CHECK: [[OR:%.*]] = or <8 x i1> [[LHS]], [[RHS]]
// CHECK: [[CAST:%.*]] = bitcast <8 x i1> [[OR]] to i8
// CHECK: [[CMP:%.*]] = icmp eq i8 [[CAST]], 0
// CHECK: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
// CHECK: trunc i32 [[ZEXT]] to i8
return _kortestz_mask8_u8(_mm512_cmpneq_epu64_mask(__A, __B),
_mm512_cmpneq_epu64_mask(__C, __D));
}
unsigned char test_kortestc_mask8_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D) {
// CHECK-LABEL: @test_kortestc_mask8_u8
// CHECK: [[LHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
// CHECK: [[RHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
// CHECK: [[OR:%.*]] = or <8 x i1> [[LHS]], [[RHS]]
// CHECK: [[CAST:%.*]] = bitcast <8 x i1> [[OR]] to i8
// CHECK: [[CMP:%.*]] = icmp eq i8 [[CAST]], -1
// CHECK: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
// CHECK: trunc i32 [[ZEXT]] to i8
return _kortestc_mask8_u8(_mm512_cmpneq_epu64_mask(__A, __B),
_mm512_cmpneq_epu64_mask(__C, __D));
}
unsigned char test_kortest_mask8_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D, unsigned char *CF) {
// CHECK-LABEL: @test_kortest_mask8_u8
// CHECK: [[LHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
// CHECK: [[RHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
// CHECK: [[OR:%.*]] = or <8 x i1> [[LHS]], [[RHS]]
// CHECK: [[CAST:%.*]] = bitcast <8 x i1> [[OR]] to i8
// CHECK: [[CMP:%.*]] = icmp eq i8 [[CAST]], -1
// CHECK: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
// CHECK: trunc i32 [[ZEXT]] to i8
// CHECK: [[LHS2:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
// CHECK: [[RHS2:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
// CHECK: [[OR2:%.*]] = or <8 x i1> [[LHS2]], [[RHS2]]
// CHECK: [[CAST2:%.*]] = bitcast <8 x i1> [[OR2]] to i8
// CHECK: [[CMP2:%.*]] = icmp eq i8 [[CAST2]], 0
// CHECK: [[ZEXT2:%.*]] = zext i1 [[CMP2]] to i32
// CHECK: trunc i32 [[ZEXT2]] to i8
return _kortest_mask8_u8(_mm512_cmpneq_epu64_mask(__A, __B),
_mm512_cmpneq_epu64_mask(__C, __D), CF);
}
__m512i test_mm512_mullo_epi64 (__m512i __A, __m512i __B) {
// CHECK-LABEL: @test_mm512_mullo_epi64
// CHECK: mul <8 x i64>

View File

@ -8149,6 +8149,52 @@ int test_mm512_kortestz(__m512i __A, __m512i __B, __m512i __C, __m512i __D) {
_mm512_cmpneq_epu32_mask(__C, __D));
}
unsigned char test_kortestz_mask16_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D) {
// CHECK-LABEL: @test_kortestz_mask16_u8
// CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
// CHECK: [[RHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
// CHECK: [[OR:%.*]] = or <16 x i1> [[LHS]], [[RHS]]
// CHECK: [[CAST:%.*]] = bitcast <16 x i1> [[OR]] to i16
// CHECK: [[CMP:%.*]] = icmp eq i16 [[CAST]], 0
// CHECK: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
// CHECK: trunc i32 [[ZEXT]] to i8
return _kortestz_mask16_u8(_mm512_cmpneq_epu32_mask(__A, __B),
_mm512_cmpneq_epu32_mask(__C, __D));
}
unsigned char test_kortestc_mask16_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D) {
// CHECK-LABEL: @test_kortestc_mask16_u8
// CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
// CHECK: [[RHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
// CHECK: [[OR:%.*]] = or <16 x i1> [[LHS]], [[RHS]]
// CHECK: [[CAST:%.*]] = bitcast <16 x i1> [[OR]] to i16
// CHECK: [[CMP:%.*]] = icmp eq i16 [[CAST]], -1
// CHECK: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
// CHECK: trunc i32 [[ZEXT]] to i8
return _kortestc_mask16_u8(_mm512_cmpneq_epu32_mask(__A, __B),
_mm512_cmpneq_epu32_mask(__C, __D));
}
unsigned char test_kortest_mask16_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D, unsigned char *CF) {
// CHECK-LABEL: @test_kortest_mask16_u8
// CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
// CHECK: [[RHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
// CHECK: [[OR:%.*]] = or <16 x i1> [[LHS]], [[RHS]]
// CHECK: [[CAST:%.*]] = bitcast <16 x i1> [[OR]] to i16
// CHECK: [[CMP:%.*]] = icmp eq i16 [[CAST]], -1
// CHECK: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
// CHECK: trunc i32 [[ZEXT]] to i8
// CHECK: [[LHS2:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
// CHECK: [[RHS2:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
// CHECK: [[OR2:%.*]] = or <16 x i1> [[LHS2]], [[RHS2]]
// CHECK: [[CAST2:%.*]] = bitcast <16 x i1> [[OR2]] to i16
// CHECK: [[CMP2:%.*]] = icmp eq i16 [[CAST2]], 0
// CHECK: [[ZEXT2:%.*]] = zext i1 [[CMP2]] to i32
// CHECK: trunc i32 [[ZEXT2]] to i8
return _kortest_mask16_u8(_mm512_cmpneq_epu32_mask(__A, __B),
_mm512_cmpneq_epu32_mask(__C, __D), CF);
}
__mmask16 test_mm512_kunpackb(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) {
// CHECK-LABEL: @test_mm512_kunpackb
// CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>