forked from OSchip/llvm-project
More replacing of target-dependent intrinsics with target-indepdent
intrinsics. The second instruction(s) to be handled are the vector versions of count set bits (ctpop). The changes here are to clang so that it generates a target independent vector ctpop when it sees an ARM dependent vector bits set count. The changes in llvm are to match the target independent vector ctpop and in VMCore/AutoUpgrade.cpp to update any existing bc files containing ARM dependent vector pop counts with target-independent ctpops. There are also changes to an existing test case in llvm for ARM vector count instructions and to a test for the bitcode upgrade. <rdar://problem/11892519> There is deliberately no test for the change to clang, as so far as I know, no consensus has been reached regarding how to test neon instructions in clang; q.v. <rdar://problem/8762292> llvm-svn: 160410
This commit is contained in:
parent
682150364a
commit
b84f7bea09
|
@ -4829,10 +4829,10 @@ defm VCLZ : N2VInt_QHS<0b11, 0b11, 0b00, 0b01001, 0,
|
||||||
// VCNT : Vector Count One Bits
|
// VCNT : Vector Count One Bits
|
||||||
def VCNTd : N2VDInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0,
|
def VCNTd : N2VDInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0,
|
||||||
IIC_VCNTiD, "vcnt", "8",
|
IIC_VCNTiD, "vcnt", "8",
|
||||||
v8i8, v8i8, int_arm_neon_vcnt>;
|
v8i8, v8i8, ctpop>;
|
||||||
def VCNTq : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0,
|
def VCNTq : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0,
|
||||||
IIC_VCNTiQ, "vcnt", "8",
|
IIC_VCNTiQ, "vcnt", "8",
|
||||||
v16i8, v16i8, int_arm_neon_vcnt>;
|
v16i8, v16i8, ctpop>;
|
||||||
|
|
||||||
// Vector Swap
|
// Vector Swap
|
||||||
def VSWPd : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 0, 0,
|
def VSWPd : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 0, 0,
|
||||||
|
|
|
@ -66,6 +66,11 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
|
||||||
"llvm.ctlz." + Name.substr(14), F->getParent());
|
"llvm.ctlz." + Name.substr(14), F->getParent());
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
if (Name.startswith("arm.neon.vcnt")) {
|
||||||
|
NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
|
||||||
|
F->arg_begin()->getType());
|
||||||
|
return true;
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case 'c': {
|
case 'c': {
|
||||||
|
@ -314,11 +319,16 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
|
||||||
case Intrinsic::arm_neon_vclz: {
|
case Intrinsic::arm_neon_vclz: {
|
||||||
// Change name from llvm.arm.neon.vclz.* to llvm.ctlz.*
|
// Change name from llvm.arm.neon.vclz.* to llvm.ctlz.*
|
||||||
CI->replaceAllUsesWith(Builder.CreateCall2(NewFn, CI->getArgOperand(0),
|
CI->replaceAllUsesWith(Builder.CreateCall2(NewFn, CI->getArgOperand(0),
|
||||||
Builder.getFalse(),
|
Builder.getFalse(),
|
||||||
"llvm.ctlz." + Name.substr(14)));
|
"llvm.ctlz." + Name.substr(14)));
|
||||||
CI->eraseFromParent();
|
CI->eraseFromParent();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
case Intrinsic::ctpop: {
|
||||||
|
CI->replaceAllUsesWith(Builder.CreateCall(NewFn, CI->getArgOperand(0)));
|
||||||
|
CI->eraseFromParent();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
case Intrinsic::x86_xop_vfrcz_ss:
|
case Intrinsic::x86_xop_vfrcz_ss:
|
||||||
case Intrinsic::x86_xop_vfrcz_sd:
|
case Intrinsic::x86_xop_vfrcz_sd:
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
; RUN: llvm-as < %s | llvm-dis | FileCheck %s
|
; RUN: llvm-as < %s | llvm-dis | FileCheck %s
|
||||||
; NB: currently tests only vclz, should also test vcnt and vcls
|
; Tests vclz and vcnt
|
||||||
|
|
||||||
define <4 x i16> @vclz16(<4 x i16>* %A) nounwind {
|
define <4 x i16> @vclz16(<4 x i16>* %A) nounwind {
|
||||||
;CHECK: @vclz16
|
;CHECK: @vclz16
|
||||||
|
@ -9,4 +9,13 @@ define <4 x i16> @vclz16(<4 x i16>* %A) nounwind {
|
||||||
ret <4 x i16> %tmp2
|
ret <4 x i16> %tmp2
|
||||||
}
|
}
|
||||||
|
|
||||||
|
define <8 x i8> @vcnt8(<8 x i8>* %A) nounwind {
|
||||||
|
;CHECK: @vcnt8
|
||||||
|
%tmp1 = load <8 x i8>* %A
|
||||||
|
%tmp2 = call <8 x i8> @llvm.arm.neon.vcnt.v8i8(<8 x i8> %tmp1)
|
||||||
|
;CHECK: call <8 x i8> @llvm.ctpop.v8i8(<8 x i8>
|
||||||
|
ret <8 x i8> %tmp2
|
||||||
|
}
|
||||||
|
|
||||||
declare <4 x i16> @llvm.arm.neon.vclz.v4i16(<4 x i16>) nounwind readnone
|
declare <4 x i16> @llvm.arm.neon.vclz.v4i16(<4 x i16>) nounwind readnone
|
||||||
|
declare <8 x i8> @llvm.arm.neon.vcnt.v8i8(<8 x i8>) nounwind readnone
|
||||||
|
|
|
@ -3,22 +3,22 @@
|
||||||
|
|
||||||
define <8 x i8> @vcnt8(<8 x i8>* %A) nounwind {
|
define <8 x i8> @vcnt8(<8 x i8>* %A) nounwind {
|
||||||
;CHECK: vcnt8:
|
;CHECK: vcnt8:
|
||||||
;CHECK: vcnt.8
|
;CHECK: vcnt.8 {{d[0-9]+}}, {{d[0-9]+}}
|
||||||
%tmp1 = load <8 x i8>* %A
|
%tmp1 = load <8 x i8>* %A
|
||||||
%tmp2 = call <8 x i8> @llvm.arm.neon.vcnt.v8i8(<8 x i8> %tmp1)
|
%tmp2 = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> %tmp1)
|
||||||
ret <8 x i8> %tmp2
|
ret <8 x i8> %tmp2
|
||||||
}
|
}
|
||||||
|
|
||||||
define <16 x i8> @vcntQ8(<16 x i8>* %A) nounwind {
|
define <16 x i8> @vcntQ8(<16 x i8>* %A) nounwind {
|
||||||
;CHECK: vcntQ8:
|
;CHECK: vcntQ8:
|
||||||
;CHECK: vcnt.8
|
;CHECK: vcnt.8 {{q[0-9]+}}, {{q[0-9]+}}
|
||||||
%tmp1 = load <16 x i8>* %A
|
%tmp1 = load <16 x i8>* %A
|
||||||
%tmp2 = call <16 x i8> @llvm.arm.neon.vcnt.v16i8(<16 x i8> %tmp1)
|
%tmp2 = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %tmp1)
|
||||||
ret <16 x i8> %tmp2
|
ret <16 x i8> %tmp2
|
||||||
}
|
}
|
||||||
|
|
||||||
declare <8 x i8> @llvm.arm.neon.vcnt.v8i8(<8 x i8>) nounwind readnone
|
declare <8 x i8> @llvm.ctpop.v8i8(<8 x i8>) nounwind readnone
|
||||||
declare <16 x i8> @llvm.arm.neon.vcnt.v16i8(<16 x i8>) nounwind readnone
|
declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8>) nounwind readnone
|
||||||
|
|
||||||
define <8 x i8> @vclz8(<8 x i8>* %A) nounwind {
|
define <8 x i8> @vclz8(<8 x i8>* %A) nounwind {
|
||||||
;CHECK: vclz8:
|
;CHECK: vclz8:
|
||||||
|
|
Loading…
Reference in New Issue