[X86] Auto-upgrade AVX1 vbroadcast intrinsics

They are replaced with the same IR that is generated for the vector-initializers in avxintrin.h. The test verifies that we get back the original instruction. I haven't seen this approach to be used in other auto-upgrade tests (i.e. llc + FileCheck) but I think it's the most direct way to test this case. I believe this should work because llc upgrades calls during parsing. (Other tests mostly check that assembling and disassembling yields the upgraded IR.) llvm-svn: 209863
2014-05-29 23:35:33 +00:00 · 2014-05-29 23:35:33 +00:00 · 39066800e9
parent 1316a0e0e0
commit 39066800e9
2 changed files with 57 additions and 0 deletions
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@ -114,6 +114,9 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
        Name == "x86.avx.movnt.pd.256" ||
        Name == "x86.avx.movnt.ps.256" ||
        Name == "x86.sse42.crc32.64.8" ||
        Name == "x86.avx.vbroadcast.ss" ||
        Name == "x86.avx.vbroadcast.ss.256" ||
        Name == "x86.avx.vbroadcast.sd.256" ||
        (Name.startswith("x86.xop.vpcom") && F->arg_size() == 2)) {
      NewFn = nullptr;
      return true;
@ -335,6 +338,19 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
      Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
      Rep = Builder.CreateCall2(CRC32, Trunc0, CI->getArgOperand(1));
      Rep = Builder.CreateZExt(Rep, CI->getType(), "");
    } else if (Name.startswith("llvm.x86.avx.vbroadcast")) {
      // Replace broadcasts with a series of insertelements.
      Type *VecTy = CI->getType();
      Type *EltTy = VecTy->getVectorElementType();
      unsigned EltNum = VecTy->getVectorNumElements();
      Value *Cast = Builder.CreateBitCast(CI->getArgOperand(0),
                                          EltTy->getPointerTo());
      Value *Load = Builder.CreateLoad(Cast);
      Type *I32Ty = Type::getInt32Ty(C);
      Rep = UndefValue::get(VecTy);
      for (unsigned I = 0; I < EltNum; ++I)
        Rep = Builder.CreateInsertElement(Rep, Load,
                                          ConstantInt::get(I32Ty, I));
    } else {
      bool PD128 = false, PD256 = false, PS128 = false, PS256 = false;
      if (Name == "llvm.x86.avx.vpermil.pd.256")
--- a/llvm/test/Bitcode/x86-upgrade-avx-vbroadcast.ll
+++ b/llvm/test/Bitcode/x86-upgrade-avx-vbroadcast.ll
@ -0,0 +1,41 @@
 ; RUN: llc -mattr=+avx < %s | FileCheck %s
 ; Check that we properly upgrade the AVX vbroadcast intrinsics to IR.  The
 ; expectation is that we should still get the original instruction back that
 ; maps to the intrinsic.
 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.9.0"
 ; CHECK-LABEL: test_mm_broadcast_ss:
 define <4 x float> @test_mm_broadcast_ss(float* readonly %__a){
 entry:
  %0 = bitcast float* %__a to i8*
 ; CHECK: vbroadcastss (%{{.*}}), %xmm
  %1 = tail call <4 x float> @llvm.x86.avx.vbroadcast.ss(i8* %0)
  ret <4 x float> %1
 }
 ; CHECK-LABEL: test_mm256_broadcast_sd:
 define <4 x double> @test_mm256_broadcast_sd(double* readonly %__a) {
 entry:
  %0 = bitcast double* %__a to i8*
 ; CHECK: vbroadcastsd (%{{.*}}), %ymm
  %1 = tail call <4 x double> @llvm.x86.avx.vbroadcast.sd.256(i8* %0)
  ret <4 x double> %1
 }
 ; CHECK-LABEL: test_mm256_broadcast_ss:
 define <8 x float> @test_mm256_broadcast_ss(float* readonly %__a) {
 entry:
  %0 = bitcast float* %__a to i8*
 ; CHECK: vbroadcastss (%{{.*}}), %ymm
  %1 = tail call <8 x float> @llvm.x86.avx.vbroadcast.ss.256(i8* %0)
  ret <8 x float> %1
 }
 declare <8 x float> @llvm.x86.avx.vbroadcast.ss.256(i8*)
 declare <4 x double> @llvm.x86.avx.vbroadcast.sd.256(i8*)
 declare <4 x float> @llvm.x86.avx.vbroadcast.ss(i8*)