forked from OSchip/llvm-project
[X86] Auto-upgrade AVX1 vbroadcast intrinsics
They are replaced with the same IR that is generated for the vector-initializers in avxintrin.h. The test verifies that we get back the original instruction. I haven't seen this approach to be used in other auto-upgrade tests (i.e. llc + FileCheck) but I think it's the most direct way to test this case. I believe this should work because llc upgrades calls during parsing. (Other tests mostly check that assembling and disassembling yields the upgraded IR.) llvm-svn: 209863
This commit is contained in:
parent
1316a0e0e0
commit
39066800e9
|
@ -114,6 +114,9 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
|
||||||
Name == "x86.avx.movnt.pd.256" ||
|
Name == "x86.avx.movnt.pd.256" ||
|
||||||
Name == "x86.avx.movnt.ps.256" ||
|
Name == "x86.avx.movnt.ps.256" ||
|
||||||
Name == "x86.sse42.crc32.64.8" ||
|
Name == "x86.sse42.crc32.64.8" ||
|
||||||
|
Name == "x86.avx.vbroadcast.ss" ||
|
||||||
|
Name == "x86.avx.vbroadcast.ss.256" ||
|
||||||
|
Name == "x86.avx.vbroadcast.sd.256" ||
|
||||||
(Name.startswith("x86.xop.vpcom") && F->arg_size() == 2)) {
|
(Name.startswith("x86.xop.vpcom") && F->arg_size() == 2)) {
|
||||||
NewFn = nullptr;
|
NewFn = nullptr;
|
||||||
return true;
|
return true;
|
||||||
|
@ -335,6 +338,19 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
|
||||||
Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
|
Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
|
||||||
Rep = Builder.CreateCall2(CRC32, Trunc0, CI->getArgOperand(1));
|
Rep = Builder.CreateCall2(CRC32, Trunc0, CI->getArgOperand(1));
|
||||||
Rep = Builder.CreateZExt(Rep, CI->getType(), "");
|
Rep = Builder.CreateZExt(Rep, CI->getType(), "");
|
||||||
|
} else if (Name.startswith("llvm.x86.avx.vbroadcast")) {
|
||||||
|
// Replace broadcasts with a series of insertelements.
|
||||||
|
Type *VecTy = CI->getType();
|
||||||
|
Type *EltTy = VecTy->getVectorElementType();
|
||||||
|
unsigned EltNum = VecTy->getVectorNumElements();
|
||||||
|
Value *Cast = Builder.CreateBitCast(CI->getArgOperand(0),
|
||||||
|
EltTy->getPointerTo());
|
||||||
|
Value *Load = Builder.CreateLoad(Cast);
|
||||||
|
Type *I32Ty = Type::getInt32Ty(C);
|
||||||
|
Rep = UndefValue::get(VecTy);
|
||||||
|
for (unsigned I = 0; I < EltNum; ++I)
|
||||||
|
Rep = Builder.CreateInsertElement(Rep, Load,
|
||||||
|
ConstantInt::get(I32Ty, I));
|
||||||
} else {
|
} else {
|
||||||
bool PD128 = false, PD256 = false, PS128 = false, PS256 = false;
|
bool PD128 = false, PD256 = false, PS128 = false, PS256 = false;
|
||||||
if (Name == "llvm.x86.avx.vpermil.pd.256")
|
if (Name == "llvm.x86.avx.vpermil.pd.256")
|
||||||
|
|
|
@ -0,0 +1,41 @@
|
||||||
|
; RUN: llc -mattr=+avx < %s | FileCheck %s
|
||||||
|
|
||||||
|
; Check that we properly upgrade the AVX vbroadcast intrinsics to IR. The
|
||||||
|
; expectation is that we should still get the original instruction back that
|
||||||
|
; maps to the intrinsic.
|
||||||
|
|
||||||
|
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
|
||||||
|
target triple = "x86_64-apple-macosx10.9.0"
|
||||||
|
|
||||||
|
; CHECK-LABEL: test_mm_broadcast_ss:
|
||||||
|
define <4 x float> @test_mm_broadcast_ss(float* readonly %__a){
|
||||||
|
entry:
|
||||||
|
%0 = bitcast float* %__a to i8*
|
||||||
|
; CHECK: vbroadcastss (%{{.*}}), %xmm
|
||||||
|
%1 = tail call <4 x float> @llvm.x86.avx.vbroadcast.ss(i8* %0)
|
||||||
|
ret <4 x float> %1
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK-LABEL: test_mm256_broadcast_sd:
|
||||||
|
define <4 x double> @test_mm256_broadcast_sd(double* readonly %__a) {
|
||||||
|
entry:
|
||||||
|
%0 = bitcast double* %__a to i8*
|
||||||
|
; CHECK: vbroadcastsd (%{{.*}}), %ymm
|
||||||
|
%1 = tail call <4 x double> @llvm.x86.avx.vbroadcast.sd.256(i8* %0)
|
||||||
|
ret <4 x double> %1
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK-LABEL: test_mm256_broadcast_ss:
|
||||||
|
define <8 x float> @test_mm256_broadcast_ss(float* readonly %__a) {
|
||||||
|
entry:
|
||||||
|
%0 = bitcast float* %__a to i8*
|
||||||
|
; CHECK: vbroadcastss (%{{.*}}), %ymm
|
||||||
|
%1 = tail call <8 x float> @llvm.x86.avx.vbroadcast.ss.256(i8* %0)
|
||||||
|
ret <8 x float> %1
|
||||||
|
}
|
||||||
|
|
||||||
|
declare <8 x float> @llvm.x86.avx.vbroadcast.ss.256(i8*)
|
||||||
|
|
||||||
|
declare <4 x double> @llvm.x86.avx.vbroadcast.sd.256(i8*)
|
||||||
|
|
||||||
|
declare <4 x float> @llvm.x86.avx.vbroadcast.ss(i8*)
|
Loading…
Reference in New Issue