forked from OSchip/llvm-project
[X86] Remove kortest intrinsics and replace with native IR.
llvm-svn: 324646
This commit is contained in:
parent
76eb26aa92
commit
dccf72b583
|
@ -3723,18 +3723,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
|||
//===----------------------------------------------------------------------===//
|
||||
// AVX512
|
||||
|
||||
// Mask ops
|
||||
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
// Mask instructions
|
||||
// 16-bit mask
|
||||
def int_x86_avx512_kortestz_w : GCCBuiltin<"__builtin_ia32_kortestzhi">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_i16_ty, llvm_i16_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_kortestc_w : GCCBuiltin<"__builtin_ia32_kortestchi">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_i16_ty, llvm_i16_ty],
|
||||
[IntrNoMem]>;
|
||||
}
|
||||
|
||||
// Conversion ops
|
||||
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
def int_x86_avx512_cvttss2si : GCCBuiltin<"__builtin_ia32_vcvttss2si32">,
|
||||
|
|
|
@ -115,6 +115,8 @@ static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
|
|||
Name == "avx512.kor.w" || // Added in 7.0
|
||||
Name == "avx512.kxor.w" || // Added in 7.0
|
||||
Name == "avx512.kxnor.w" || // Added in 7.0
|
||||
Name == "avx512.kortestc.w" || // Added in 7.0
|
||||
Name == "avx512.kortestz.w" || // Added in 7.0
|
||||
Name.startswith("avx512.mask.pshuf.b.") || // Added in 4.0
|
||||
Name.startswith("avx2.pmax") || // Added in 3.9
|
||||
Name.startswith("avx2.pmin") || // Added in 3.9
|
||||
|
@ -1156,6 +1158,19 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
|
|||
Rep = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
|
||||
Rep = Builder.CreateNot(Rep);
|
||||
Rep = Builder.CreateBitCast(Rep, CI->getType());
|
||||
} else if (IsX86 &&
|
||||
(Name == "avx512.kortestz.w" || Name == "avx512.kortestc.w")) {
|
||||
Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
|
||||
Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
|
||||
Rep = Builder.CreateOr(LHS, RHS);
|
||||
Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty());
|
||||
Value *C;
|
||||
if (Name[14] == 'c')
|
||||
C = ConstantInt::getAllOnesValue(Builder.getInt16Ty());
|
||||
else
|
||||
C = ConstantInt::getNullValue(Builder.getInt16Ty());
|
||||
Rep = Builder.CreateICmpEQ(Rep, C);
|
||||
Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
|
||||
} else if (IsX86 && (Name == "sse.add.ss" || Name == "sse2.add.sd")) {
|
||||
Type *I32Ty = Type::getInt32Ty(C);
|
||||
Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
|
||||
|
|
|
@ -20561,16 +20561,6 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
|
|||
SDValue SetCC = getSETCC(X86CC, Test, dl, DAG);
|
||||
return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
|
||||
}
|
||||
case Intrinsic::x86_avx512_kortestz_w:
|
||||
case Intrinsic::x86_avx512_kortestc_w: {
|
||||
X86::CondCode X86CC =
|
||||
(IntNo == Intrinsic::x86_avx512_kortestz_w) ? X86::COND_E : X86::COND_B;
|
||||
SDValue LHS = DAG.getBitcast(MVT::v16i1, Op.getOperand(1));
|
||||
SDValue RHS = DAG.getBitcast(MVT::v16i1, Op.getOperand(2));
|
||||
SDValue Test = DAG.getNode(X86ISD::KORTEST, dl, MVT::i32, LHS, RHS);
|
||||
SDValue SetCC = getSETCC(X86CC, Test, dl, DAG);
|
||||
return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
|
||||
}
|
||||
|
||||
case Intrinsic::x86_sse42_pcmpistria128:
|
||||
case Intrinsic::x86_sse42_pcmpestria128:
|
||||
|
|
|
@ -55,6 +55,103 @@ entry:
|
|||
ret i16 %13
|
||||
}
|
||||
|
||||
define i32 @test_mm512_kortestc(<8 x i64> %__A, <8 x i64> %__B, <8 x i64> %__C, <8 x i64> %__D) {
|
||||
; X32-LABEL: test_mm512_kortestc:
|
||||
; X32: # %bb.0: # %entry
|
||||
; X32-NEXT: pushl %ebp
|
||||
; X32-NEXT: .cfi_def_cfa_offset 8
|
||||
; X32-NEXT: .cfi_offset %ebp, -8
|
||||
; X32-NEXT: movl %esp, %ebp
|
||||
; X32-NEXT: .cfi_def_cfa_register %ebp
|
||||
; X32-NEXT: andl $-64, %esp
|
||||
; X32-NEXT: subl $64, %esp
|
||||
; X32-NEXT: vpcmpneqd %zmm1, %zmm0, %k0
|
||||
; X32-NEXT: vpcmpneqd 8(%ebp), %zmm2, %k1
|
||||
; X32-NEXT: korw %k0, %k1, %k0
|
||||
; X32-NEXT: kmovw %k0, %eax
|
||||
; X32-NEXT: cmpw $-1, %ax
|
||||
; X32-NEXT: sete %al
|
||||
; X32-NEXT: andb $1, %al
|
||||
; X32-NEXT: movzbl %al, %eax
|
||||
; X32-NEXT: movl %ebp, %esp
|
||||
; X32-NEXT: popl %ebp
|
||||
; X32-NEXT: vzeroupper
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm512_kortestc:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: vpcmpneqd %zmm1, %zmm0, %k0
|
||||
; X64-NEXT: vpcmpneqd %zmm3, %zmm2, %k1
|
||||
; X64-NEXT: korw %k0, %k1, %k0
|
||||
; X64-NEXT: kmovw %k0, %eax
|
||||
; X64-NEXT: cmpw $-1, %ax
|
||||
; X64-NEXT: sete %al
|
||||
; X64-NEXT: andb $1, %al
|
||||
; X64-NEXT: movzbl %al, %eax
|
||||
; X64-NEXT: vzeroupper
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%0 = bitcast <8 x i64> %__A to <16 x i32>
|
||||
%1 = bitcast <8 x i64> %__B to <16 x i32>
|
||||
%2 = icmp ne <16 x i32> %0, %1
|
||||
%3 = bitcast <8 x i64> %__C to <16 x i32>
|
||||
%4 = bitcast <8 x i64> %__D to <16 x i32>
|
||||
%5 = icmp ne <16 x i32> %3, %4
|
||||
%6 = or <16 x i1> %5, %2 %7 = bitcast <16 x i1> %6 to i16
|
||||
%8 = icmp eq i16 %7, -1
|
||||
%9 = zext i1 %8 to i32
|
||||
ret i32 %9
|
||||
}
|
||||
|
||||
define i32 @test_mm512_kortestz(<8 x i64> %__A, <8 x i64> %__B, <8 x i64> %__C, <8 x i64> %__D) {
|
||||
; X32-LABEL: test_mm512_kortestz:
|
||||
; X32: # %bb.0: # %entry
|
||||
; X32-NEXT: pushl %ebp
|
||||
; X32-NEXT: .cfi_def_cfa_offset 8
|
||||
; X32-NEXT: .cfi_offset %ebp, -8
|
||||
; X32-NEXT: movl %esp, %ebp
|
||||
; X32-NEXT: .cfi_def_cfa_register %ebp
|
||||
; X32-NEXT: andl $-64, %esp
|
||||
; X32-NEXT: subl $64, %esp
|
||||
; X32-NEXT: vpcmpneqd %zmm1, %zmm0, %k0
|
||||
; X32-NEXT: vpcmpneqd 8(%ebp), %zmm2, %k1
|
||||
; X32-NEXT: korw %k0, %k1, %k0
|
||||
; X32-NEXT: kmovw %k0, %eax
|
||||
; X32-NEXT: cmpw $0, %ax
|
||||
; X32-NEXT: sete %al
|
||||
; X32-NEXT: andb $1, %al
|
||||
; X32-NEXT: movzbl %al, %eax
|
||||
; X32-NEXT: movl %ebp, %esp
|
||||
; X32-NEXT: popl %ebp
|
||||
; X32-NEXT: vzeroupper
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm512_kortestz:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: vpcmpneqd %zmm1, %zmm0, %k0
|
||||
; X64-NEXT: vpcmpneqd %zmm3, %zmm2, %k1
|
||||
; X64-NEXT: korw %k0, %k1, %k0
|
||||
; X64-NEXT: kmovw %k0, %eax
|
||||
; X64-NEXT: cmpw $0, %ax
|
||||
; X64-NEXT: sete %al
|
||||
; X64-NEXT: andb $1, %al
|
||||
; X64-NEXT: movzbl %al, %eax
|
||||
; X64-NEXT: vzeroupper
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%0 = bitcast <8 x i64> %__A to <16 x i32>
|
||||
%1 = bitcast <8 x i64> %__B to <16 x i32>
|
||||
%2 = icmp ne <16 x i32> %0, %1
|
||||
%3 = bitcast <8 x i64> %__C to <16 x i32>
|
||||
%4 = bitcast <8 x i64> %__D to <16 x i32>
|
||||
%5 = icmp ne <16 x i32> %3, %4
|
||||
%6 = or <16 x i1> %5, %2
|
||||
%7 = bitcast <16 x i1> %6 to i16
|
||||
%8 = icmp eq i16 %7, 0
|
||||
%9 = zext i1 %8 to i32
|
||||
ret i32 %9
|
||||
}
|
||||
|
||||
define <16 x float> @test_mm512_shuffle_f32x4(<16 x float> %__A, <16 x float> %__B) {
|
||||
; X32-LABEL: test_mm512_shuffle_f32x4:
|
||||
; X32: # %bb.0: # %entry
|
||||
|
|
|
@ -3832,3 +3832,48 @@ define i16 @test_kxor(i16 %a0, i16 %a1) {
|
|||
ret i16 %t2
|
||||
}
|
||||
|
||||
declare i32 @llvm.x86.avx512.kortestz.w(i16, i16) nounwind readnone
|
||||
define i32 @test_kortestz(<8 x i64> %A, <8 x i64> %B, <8 x i64> %C, <8 x i64> %D) {
|
||||
; CHECK-LABEL: test_kortestz:
|
||||
; CHECK: ## %bb.0: ## %entry
|
||||
; CHECK-NEXT: vpcmpneqd %zmm1, %zmm0, %k0
|
||||
; CHECK-NEXT: vpcmpneqd %zmm3, %zmm2, %k1
|
||||
; CHECK-NEXT: xorl %eax, %eax
|
||||
; CHECK-NEXT: kortestw %k1, %k0
|
||||
; CHECK-NEXT: sete %al
|
||||
; CHECK-NEXT: retq
|
||||
entry:
|
||||
%0 = bitcast <8 x i64> %A to <16 x i32>
|
||||
%1 = bitcast <8 x i64> %B to <16 x i32>
|
||||
%2 = icmp ne <16 x i32> %0, %1
|
||||
%3 = bitcast <8 x i64> %C to <16 x i32>
|
||||
%4 = bitcast <8 x i64> %D to <16 x i32>
|
||||
%5 = icmp ne <16 x i32> %3, %4
|
||||
%6 = bitcast <16 x i1> %2 to i16
|
||||
%7 = bitcast <16 x i1> %5 to i16
|
||||
%res = call i32 @llvm.x86.avx512.kortestz.w(i16 %6, i16 %7)
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
declare i32 @llvm.x86.avx512.kortestc.w(i16, i16) nounwind readnone
|
||||
define i32 @test_kortestc(<8 x i64> %A, <8 x i64> %B, <8 x i64> %C, <8 x i64> %D) {
|
||||
; CHECK-LABEL: test_kortestc:
|
||||
; CHECK: ## %bb.0: ## %entry
|
||||
; CHECK-NEXT: vpcmpneqd %zmm1, %zmm0, %k0
|
||||
; CHECK-NEXT: vpcmpneqd %zmm3, %zmm2, %k1
|
||||
; CHECK-NEXT: xorl %eax, %eax
|
||||
; CHECK-NEXT: kortestw %k1, %k0
|
||||
; CHECK-NEXT: sete %al
|
||||
; CHECK-NEXT: retq
|
||||
entry:
|
||||
%0 = bitcast <8 x i64> %A to <16 x i32>
|
||||
%1 = bitcast <8 x i64> %B to <16 x i32>
|
||||
%2 = icmp ne <16 x i32> %0, %1
|
||||
%3 = bitcast <8 x i64> %C to <16 x i32>
|
||||
%4 = bitcast <8 x i64> %D to <16 x i32>
|
||||
%5 = icmp ne <16 x i32> %3, %4
|
||||
%6 = bitcast <16 x i1> %2 to i16
|
||||
%7 = bitcast <16 x i1> %5 to i16
|
||||
%res = call i32 @llvm.x86.avx512.kortestz.w(i16 %6, i16 %7)
|
||||
ret i32 %res
|
||||
}
|
||||
|
|
|
@ -1,34 +1,6 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
|
||||
|
||||
declare i32 @llvm.x86.avx512.kortestz.w(i16, i16) nounwind readnone
|
||||
define i32 @test_kortestz(i16 %a0, i16 %a1) {
|
||||
; CHECK-LABEL: test_kortestz:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: kmovw %esi, %k0
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: xorl %eax, %eax
|
||||
; CHECK-NEXT: kortestw %k0, %k1
|
||||
; CHECK-NEXT: sete %al
|
||||
; CHECK-NEXT: retq
|
||||
%res = call i32 @llvm.x86.avx512.kortestz.w(i16 %a0, i16 %a1)
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
declare i32 @llvm.x86.avx512.kortestc.w(i16, i16) nounwind readnone
|
||||
define i32 @test_kortestc(i16 %a0, i16 %a1) {
|
||||
; CHECK-LABEL: test_kortestc:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: kmovw %esi, %k0
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: xorl %eax, %eax
|
||||
; CHECK-NEXT: kortestw %k0, %k1
|
||||
; CHECK-NEXT: setb %al
|
||||
; CHECK-NEXT: retq
|
||||
%res = call i32 @llvm.x86.avx512.kortestc.w(i16 %a0, i16 %a1)
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
define <16 x float> @test_rcp_ps_512(<16 x float> %a0) {
|
||||
; CHECK-LABEL: test_rcp_ps_512:
|
||||
; CHECK: ## %bb.0:
|
||||
|
|
Loading…
Reference in New Issue