Remove the BBVectorize pass.

It served us well, helped kick-start much of the vectorization efforts in LLVM, etc. Its time has come and past. Back in 2014: http://lists.llvm.org/pipermail/llvm-dev/2014-November/079091.html Time to actually let go and move forward. =] I've updated the release notes both about the removal and the deprecation of the corresponding C API. llvm-svn: 306797
2017-06-30 07:09:08 +00:00 · 2017-06-30 07:09:08 +00:00 · 3545a9e1f9
parent e24f434eb2
commit 3545a9e1f9
43 changed files with 18 additions and 6180 deletions
--- a/llvm/CODE_OWNERS.TXT
+++ b/llvm/CODE_OWNERS.TXT
@ -70,7 +70,7 @@ D: Branch weights and BlockFrequencyInfo

 N: Hal Finkel
 E: hfinkel@anl.gov
-D: BBVectorize, the loop reroller, alias analysis and the PowerPC target
+D: The loop reroller, alias analysis and the PowerPC target

 N: Dan Gohman
 E: sunfish@mozilla.com
--- a/llvm/docs/ReleaseNotes.rst
+++ b/llvm/docs/ReleaseNotes.rst
@ -54,8 +54,9 @@ Non-comprehensive list of changes in this release
  its nature as a general purpose PDB manipulation / diagnostics tool that does
  more than just dumping contents.
  
-
-* ... next change ...
+* The ``BBVectorize`` pass has been removed. It was fully replaced and no
+  longer used back in 2014 but we didn't get around to removing it. Now it is
+  gone. The SLP vectorizer is the suggested non-loop vectorization pass.

 .. NOTE
   If you would like to document a larger change, then you can add a
@ -111,7 +112,11 @@ Changes to the OCaml bindings
 Changes to the C API
 --------------------

- During this release ...
+* Deprecated the ``LLVMAddBBVectorizePass`` interface since the ``BBVectorize``
+  pass has been removed. It is now a no-op and will be removed in the next
+  release. Use ``LLVMAddSLPVectorizePass`` instead to get the supported SLP
+  vectorizer.
+

 External Open Source Projects Using LLVM 5
 ==========================================
--- a/llvm/include/llvm-c/Transforms/Vectorize.h
+++ b/llvm/include/llvm-c/Transforms/Vectorize.h
@ -33,7 +33,7 @@ extern "C" {
 * @{
 */

-/** See llvm::createBBVectorizePass function. */
+/** DEPRECATED - Use LLVMAddSLPVectorizePass */
 void LLVMAddBBVectorizePass(LLVMPassManagerRef PM);

 /** See llvm::createLoopVectorizePass function. */
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@ -70,7 +70,6 @@ void initializeAlwaysInlinerLegacyPassPass(PassRegistry&);
 void initializeArgPromotionPass(PassRegistry&);
 void initializeAssumptionCacheTrackerPass(PassRegistry&);
 void initializeAtomicExpandPass(PassRegistry&);
-void initializeBBVectorizePass(PassRegistry&);
 void initializeBDCELegacyPassPass(PassRegistry&);
 void initializeBarrierNoopPass(PassRegistry&);
 void initializeBasicAAWrapperPassPass(PassRegistry&);
--- a/llvm/include/llvm/LinkAllPasses.h
+++ b/llvm/include/llvm/LinkAllPasses.h
@ -195,7 +195,6 @@ namespace {
      (void) llvm::createLoopVectorizePass();
      (void) llvm::createSLPVectorizerPass();
      (void) llvm::createLoadStoreVectorizerPass();
-      (void) llvm::createBBVectorizePass();
      (void) llvm::createPartiallyInlineLibCallsPass();
      (void) llvm::createScalarizerPass();
      (void) llvm::createSeparateConstOffsetFromGEPPass();
--- a/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h
+++ b/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h
@ -145,7 +145,6 @@ public:
  bool DisableTailCalls;
  bool DisableUnitAtATime;
  bool DisableUnrollLoops;
-  bool BBVectorize;
  bool SLPVectorize;
  bool LoopVectorize;
  bool RerollLoops;
--- a/llvm/include/llvm/Transforms/Vectorize.h
+++ b/llvm/include/llvm/Transforms/Vectorize.h
@ -106,13 +106,6 @@ struct VectorizeConfig {
  VectorizeConfig();
 };

-//===----------------------------------------------------------------------===//
-//
-// BBVectorize - A basic-block vectorization pass.
-//
-BasicBlockPass *
-createBBVectorizePass(const VectorizeConfig &C = VectorizeConfig());
-
 //===----------------------------------------------------------------------===//
 //
 // LoopVectorize - Create a loop vectorization pass.
--- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
@ -55,10 +55,6 @@ static cl::opt<bool>
 RunSLPVectorization("vectorize-slp", cl::Hidden,
                    cl::desc("Run the SLP vectorization passes"));

-static cl::opt<bool>
-RunBBVectorization("vectorize-slp-aggressive", cl::Hidden,
-                    cl::desc("Run the BB vectorization passes"));
-
 static cl::opt<bool>
 UseGVNAfterVectorization("use-gvn-after-vectorization",
  cl::init(false), cl::Hidden,
@ -166,7 +162,6 @@ PassManagerBuilder::PassManagerBuilder() {
    Inliner = nullptr;
    DisableUnitAtATime = false;
    DisableUnrollLoops = false;
-    BBVectorize = RunBBVectorization;
    SLPVectorize = RunSLPVectorization;
    LoopVectorize = RunLoopVectorization;
    RerollLoops = RunLoopRerolling;
@ -384,26 +379,8 @@ void PassManagerBuilder::addFunctionSimplificationPasses(

  if (RerollLoops)
    MPM.add(createLoopRerollPass());
-  if (!RunSLPAfterLoopVectorization) {
-    if (SLPVectorize)
-      MPM.add(createSLPVectorizerPass());   // Vectorize parallel scalar chains.
-
-    if (BBVectorize) {
-      MPM.add(createBBVectorizePass());
-      addInstructionCombiningPass(MPM);
-      addExtensionsToPM(EP_Peephole, MPM);
-      if (OptLevel > 1 && UseGVNAfterVectorization)
-        MPM.add(NewGVN
-                    ? createNewGVNPass()
-                    : createGVNPass(DisableGVNLoadPRE)); // Remove redundancies
-      else
-        MPM.add(createEarlyCSEPass());      // Catch trivial redundancies
-
-      // BBVectorize may have significantly shortened a loop body; unroll again.
-      if (!DisableUnrollLoops)
-        MPM.add(createLoopUnrollPass(OptLevel));
-    }
-  }
+  if (!RunSLPAfterLoopVectorization && SLPVectorize)
+    MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains.

  MPM.add(createAggressiveDCEPass());         // Delete dead instructions
  MPM.add(createCFGSimplificationPass()); // Merge & remove BBs
@ -635,28 +612,10 @@ void PassManagerBuilder::populateModulePassManager(
    addInstructionCombiningPass(MPM);
  }

-  if (RunSLPAfterLoopVectorization) {
-    if (SLPVectorize) {
-      MPM.add(createSLPVectorizerPass());   // Vectorize parallel scalar chains.
-      if (OptLevel > 1 && ExtraVectorizerPasses) {
-        MPM.add(createEarlyCSEPass());
-      }
-    }
-
-    if (BBVectorize) {
-      MPM.add(createBBVectorizePass());
-      addInstructionCombiningPass(MPM);
-      addExtensionsToPM(EP_Peephole, MPM);
-      if (OptLevel > 1 && UseGVNAfterVectorization)
-        MPM.add(NewGVN
-                    ? createNewGVNPass()
-                    : createGVNPass(DisableGVNLoadPRE)); // Remove redundancies
-      else
-        MPM.add(createEarlyCSEPass());      // Catch trivial redundancies
-
-      // BBVectorize may have significantly shortened a loop body; unroll again.
-      if (!DisableUnrollLoops)
-        MPM.add(createLoopUnrollPass(OptLevel));
+  if (RunSLPAfterLoopVectorization && SLPVectorize) {
+    MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains.
+    if (OptLevel > 1 && ExtraVectorizerPasses) {
+      MPM.add(createEarlyCSEPass());
    }
  }

--- a/llvm/lib/Transforms/Vectorize/BBVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/BBVectorize.cpp
--- a/llvm/lib/Transforms/Vectorize/CMakeLists.txt
+++ b/llvm/lib/Transforms/Vectorize/CMakeLists.txt
@ -1,5 +1,4 @@
 add_llvm_library(LLVMVectorize
-  BBVectorize.cpp
  LoadStoreVectorizer.cpp
  LoopVectorize.cpp
  SLPVectorizer.cpp
--- a/llvm/lib/Transforms/Vectorize/Vectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/Vectorize.cpp
@ -26,7 +26,6 @@ using namespace llvm;
 /// initializeVectorizationPasses - Initialize all passes linked into the
 /// Vectorization library.
 void llvm::initializeVectorization(PassRegistry &Registry) {
-  initializeBBVectorizePass(Registry);
  initializeLoopVectorizePass(Registry);
  initializeSLPVectorizerPass(Registry);
  initializeLoadStoreVectorizerPass(Registry);
@ -36,8 +35,8 @@ void LLVMInitializeVectorization(LLVMPassRegistryRef R) {
  initializeVectorization(*unwrap(R));
 }

+// DEPRECATED: Remove after the LLVM 5 release.
 void LLVMAddBBVectorizePass(LLVMPassManagerRef PM) {
-  unwrap(PM)->add(createBBVectorizePass());
 }

 void LLVMAddLoopVectorizePass(LLVMPassManagerRef PM) {
--- a/llvm/test/Feature/optnone-opt.ll
+++ b/llvm/test/Feature/optnone-opt.ll
@ -2,7 +2,7 @@
 ; RUN: opt -O1 -S -debug %s 2>&1 | FileCheck %s --check-prefix=OPT-O1
 ; RUN: opt -O2 -S -debug %s 2>&1 | FileCheck %s --check-prefix=OPT-O1 --check-prefix=OPT-O2O3
 ; RUN: opt -O3 -S -debug %s 2>&1 | FileCheck %s --check-prefix=OPT-O1 --check-prefix=OPT-O2O3
-; RUN: opt -bb-vectorize -dce -die -gvn-hoist -loweratomic -S -debug %s 2>&1 | FileCheck %s --check-prefix=OPT-MORE
+; RUN: opt -dce -die -gvn-hoist -loweratomic -S -debug %s 2>&1 | FileCheck %s --check-prefix=OPT-MORE
 ; RUN: opt -indvars -licm -loop-deletion -loop-extract -loop-idiom -loop-instsimplify -loop-reduce -loop-reroll -loop-rotate -loop-unroll -loop-unswitch -S -debug %s 2>&1 | FileCheck %s --check-prefix=OPT-LOOP

 ; REQUIRES: asserts
@ -55,7 +55,6 @@ attributes #0 = { optnone noinline }
 ; OPT-O2O3-DAG: Skipping pass 'SLP Vectorizer'

 ; Additional IR passes that opt doesn't turn on by default.
-; OPT-MORE-DAG: Skipping pass 'Basic-Block Vectorization'
 ; OPT-MORE-DAG: Skipping pass 'Dead Code Elimination'
 ; OPT-MORE-DAG: Skipping pass 'Dead Instruction Elimination'
 ; OPT-MORE-DAG: Skipping pass 'Lower atomic intrinsics
--- a/llvm/test/Transforms/BBVectorize/X86/cmp-types.ll
+++ b/llvm/test/Transforms/BBVectorize/X86/cmp-types.ll
@ -1,16 +0,0 @@
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -bb-vectorize -S | FileCheck %s
-
-%"struct.btSoftBody" = type { float, float, float*, i8 }
-
-define void @test1(%"struct.btSoftBody"* %n1, %"struct.btSoftBody"* %n2) uwtable align 2 {
-entry:
-  %tobool15 = icmp ne %"struct.btSoftBody"* %n1, null
-  %cond16 = zext i1 %tobool15 to i32
-  %tobool21 = icmp ne %"struct.btSoftBody"* %n2, null
-  %cond22 = zext i1 %tobool21 to i32
-  ret void
-; CHECK-LABEL: @test1(
-}
-
--- a/llvm/test/Transforms/BBVectorize/X86/loop1.ll
+++ b/llvm/test/Transforms/BBVectorize/X86/loop1.ll
@ -1,61 +0,0 @@
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -basicaa -loop-unroll -unroll-partial-threshold=45 -unroll-allow-partial -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s -check-prefix=CHECK-UNRL
-; The second check covers the use of alias analysis (with loop unrolling).
-
-define void @test1(double* noalias %out, double* noalias %in1, double* noalias %in2) nounwind uwtable {
-entry:
-  br label %for.body
-; CHECK-LABEL: @test1(
-; CHECK-UNRL-LABEL: @test1(
-
-for.body:                                         ; preds = %for.body, %entry
-  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds double, double* %in1, i64 %indvars.iv
-  %0 = load double, double* %arrayidx, align 8
-  %arrayidx2 = getelementptr inbounds double, double* %in2, i64 %indvars.iv
-  %1 = load double, double* %arrayidx2, align 8
-  %mul = fmul double %0, %0
-  %mul3 = fmul double %0, %1
-  %add = fadd double %mul, %mul3
-  %add4 = fadd double %1, %1
-  %add5 = fadd double %add4, %0
-  %mul6 = fmul double %0, %add5
-  %add7 = fadd double %add, %mul6
-  %mul8 = fmul double %1, %1
-  %add9 = fadd double %0, %0
-  %add10 = fadd double %add9, %0
-  %mul11 = fmul double %mul8, %add10
-  %add12 = fadd double %add7, %mul11
-  %arrayidx14 = getelementptr inbounds double, double* %out, i64 %indvars.iv
-  store double %add12, double* %arrayidx14, align 8
-  %indvars.iv.next = add i64 %indvars.iv, 1
-  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
-  %exitcond = icmp eq i32 %lftr.wideiv, 10
-  br i1 %exitcond, label %for.end, label %for.body
-; CHECK: insertelement
-; CHECK-NEXT: insertelement
-; CHECK-NEXT: fadd <2 x double>
-; CHECK-NEXT: insertelement
-; CHECK-NEXT: shufflevector
-; CHECK-NEXT: fadd <2 x double>
-; CHECK-NEXT: insertelement
-; CHECK-NEXT: fmul <2 x double>
-
-; CHECK-UNRL: %mul = fmul <2 x double> %2, %2
-; CHECK-UNRL: %mul3 = fmul <2 x double> %2, %3
-; CHECK-UNRL: %add = fadd <2 x double> %mul, %mul3
-; CHECK-UNRL: %add4 = fadd <2 x double> %3, %3
-; CHECK-UNRL: %add5 = fadd <2 x double> %add4, %2
-; CHECK-UNRL: %mul6 = fmul <2 x double> %2, %add5
-; CHECK-UNRL: %add7 = fadd <2 x double> %add, %mul6
-; CHECK-UNRL: %mul8 = fmul <2 x double> %3, %3
-; CHECK-UNRL: %add9 = fadd <2 x double> %2, %2
-; CHECK-UNRL: %add10 = fadd <2 x double> %add9, %2
-; CHECK-UNRL: %mul11 = fmul <2 x double> %mul8, %add10
-; CHECK-UNRL: %add12 = fadd <2 x double> %add7, %mul11
-
-for.end:                                          ; preds = %for.body
-  ret void
-}
--- a/llvm/test/Transforms/BBVectorize/X86/pr15289.ll
+++ b/llvm/test/Transforms/BBVectorize/X86/pr15289.ll
@ -1,95 +0,0 @@
-; RUN: opt < %s -basicaa -bb-vectorize -disable-output
-; This is a bugpoint-reduced test case. It did not always assert, but does reproduce the bug
-; and running under valgrind (or some similar tool) will catch the error.
-
-target datalayout = "e-p:64:64:64-S128-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f16:16:16-f32:32:32-f64:64:64-f128:128:128-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
-target triple = "x86_64-apple-darwin12.2.0"
-
-%0 = type { [10 x { float, float }], [10 x { float, float }], [10 x { float, float }], [10 x { float, float }], [10 x { float, float }] }
-%1 = type { [10 x [8 x i8]] }
-%2 = type { i64, i64 }
-%3 = type { [10 x i64], i64, i64, i64, i64, i64 }
-%4 = type { i64, i64, i64, i64, i64, i64 }
-%5 = type { [10 x i64] }
-%6 = type { [10 x float], [10 x float], [10 x float], [10 x float] }
-%struct.__st_parameter_dt.1.3.5.7 = type { %struct.__st_parameter_common.0.2.4.6, i64, i64*, i64*, i8*, i8*, i32, i32, i8*, i8*, i32, i32, i8*, [256 x i8], i32*, i64, i8*, i32, i32, i8*, i8*, i32, i32, i8*, i8*, i32, i32, i8*, i8*, i32, [4 x i8] }
-%struct.__st_parameter_common.0.2.4.6 = type { i32, i32, i8*, i32, i32, i8*, i32* }
-
-@cctenso_ = external unnamed_addr global %0, align 32
-@ctenso_ = external unnamed_addr global %1, align 32
-@i_dim_ = external unnamed_addr global %2, align 16
-@itenso1_ = external unnamed_addr global %3, align 32
-@itenso2_ = external unnamed_addr global %4, align 32
-@ltenso_ = external unnamed_addr global %5, align 32
-@rtenso_ = external unnamed_addr global %6, align 32
-@.cst = external unnamed_addr constant [8 x i8], align 8
-@.cst1 = external unnamed_addr constant [3 x i8], align 8
-@.cst2 = external unnamed_addr constant [29 x i8], align 8
-@.cst3 = external unnamed_addr constant [32 x i8], align 64
-
-define void @cart_to_dc2y_(double* noalias nocapture %xx, double* noalias nocapture %yy, double* noalias nocapture %zz, [5 x { double, double }]* noalias nocapture %c2ten) nounwind uwtable {
-entry:
-  %0 = fmul double undef, undef
-  %1 = fmul double undef, undef
-  %2 = fadd double undef, undef
-  %3 = fmul double undef, 0x3FE8B8B76E3E9919
-  %4 = fsub double %0, %1
-  %5 = fsub double -0.000000e+00, undef
-  %6 = fmul double undef, undef
-  %7 = fmul double %4, %6
-  %8 = fmul double undef, 2.000000e+00
-  %9 = fmul double %8, undef
-  %10 = fmul double undef, %9
-  %11 = fmul double %10, undef
-  %12 = fsub double undef, %7
-  %13 = fmul double %3, %12
-  %14 = fmul double %3, undef
-  %15 = getelementptr inbounds [5 x { double, double }], [5 x { double, double }]* %c2ten, i64 0, i64 0, i32 0
-  store double %13, double* %15, align 8
-  %16 = getelementptr inbounds [5 x { double, double }], [5 x { double, double }]* %c2ten, i64 0, i64 0, i32 1
-  %17 = fmul double undef, %8
-  %18 = fmul double %17, undef
-  %19 = fmul double undef, %18
-  %20 = fadd double undef, undef
-  %21 = fmul double %3, %19
-  %22 = fsub double -0.000000e+00, %21
-  %23 = getelementptr inbounds [5 x { double, double }], [5 x { double, double }]* %c2ten, i64 0, i64 1, i32 0
-  store double %22, double* %23, align 8
-  %24 = getelementptr inbounds [5 x { double, double }], [5 x { double, double }]* %c2ten, i64 0, i64 1, i32 1
-  %25 = fmul double undef, 0x3FE42F601A8C6794
-  %26 = fmul double undef, 2.000000e+00
-  %27 = fsub double %26, %0
-  %28 = fmul double %6, undef
-  %29 = fsub double undef, %28
-  %30 = getelementptr inbounds [5 x { double, double }], [5 x { double, double }]* %c2ten, i64 0, i64 2, i32 0
-  store double undef, double* %30, align 8
-  %31 = getelementptr inbounds [5 x { double, double }], [5 x { double, double }]* %c2ten, i64 0, i64 2, i32 1
-  %32 = fmul double undef, %17
-  %33 = fmul double undef, %17
-  %34 = fmul double undef, %32
-  %35 = fmul double undef, %33
-  %36 = fsub double undef, %35
-  %37 = fmul double %3, %34
-  %38 = getelementptr inbounds [5 x { double, double }], [5 x { double, double }]* %c2ten, i64 0, i64 3, i32 0
-  store double %37, double* %38, align 8
-  %39 = getelementptr inbounds [5 x { double, double }], [5 x { double, double }]* %c2ten, i64 0, i64 3, i32 1
-  %40 = fmul double undef, %8
-  %41 = fmul double undef, %40
-  %42 = fmul double undef, %41
-  %43 = fsub double undef, %42
-  %44 = fmul double %3, %43
-  %45 = getelementptr inbounds [5 x { double, double }], [5 x { double, double }]* %c2ten, i64 0, i64 4, i32 0
-  store double %13, double* %45, align 8
-  %46 = getelementptr inbounds [5 x { double, double }], [5 x { double, double }]* %c2ten, i64 0, i64 4, i32 1
-  %47 = fsub double -0.000000e+00, %14
-  store double %47, double* %16, align 8
-  store double undef, double* %24, align 8
-  store double -0.000000e+00, double* %31, align 8
-  store double undef, double* %39, align 8
-  store double undef, double* %46, align 8
-  ret void
-}
-
-attributes #0 = { nounwind uwtable }
-attributes #1 = { nounwind readnone }
-attributes #2 = { nounwind }
--- a/llvm/test/Transforms/BBVectorize/X86/sh-rec.ll
+++ b/llvm/test/Transforms/BBVectorize/X86/sh-rec.ll
@ -1,54 +0,0 @@
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -bb-vectorize -S | FileCheck %s
-
-define void @ptoa() nounwind uwtable {
-entry:
-  %call = call i8* @malloc() nounwind
-  br i1 undef, label %return, label %if.end10
-
-if.end10:                                         ; preds = %entry
-  %incdec.ptr = getelementptr inbounds i8, i8* %call, i64 undef
-  %call17 = call i32 @ptou() nounwind
-  %incdec.ptr26.1 = getelementptr inbounds i8, i8* %incdec.ptr, i64 -2
-  store i8 undef, i8* %incdec.ptr26.1, align 1
-  %div27.1 = udiv i32 %call17, 100
-  %rem.2 = urem i32 %div27.1, 10
-  %add2230.2 = or i32 %rem.2, 48
-  %conv25.2 = trunc i32 %add2230.2 to i8
-  %incdec.ptr26.2 = getelementptr inbounds i8, i8* %incdec.ptr, i64 -3
-  store i8 %conv25.2, i8* %incdec.ptr26.2, align 1
-  %incdec.ptr26.3 = getelementptr inbounds i8, i8* %incdec.ptr, i64 -4
-  store i8 undef, i8* %incdec.ptr26.3, align 1
-  %div27.3 = udiv i32 %call17, 10000
-  %rem.4 = urem i32 %div27.3, 10
-  %add2230.4 = or i32 %rem.4, 48
-  %conv25.4 = trunc i32 %add2230.4 to i8
-  %incdec.ptr26.4 = getelementptr inbounds i8, i8* %incdec.ptr, i64 -5
-  store i8 %conv25.4, i8* %incdec.ptr26.4, align 1
-  %div27.4 = udiv i32 %call17, 100000
-  %rem.5 = urem i32 %div27.4, 10
-  %add2230.5 = or i32 %rem.5, 48
-  %conv25.5 = trunc i32 %add2230.5 to i8
-  %incdec.ptr26.5 = getelementptr inbounds i8, i8* %incdec.ptr, i64 -6
-  store i8 %conv25.5, i8* %incdec.ptr26.5, align 1
-  %incdec.ptr26.6 = getelementptr inbounds i8, i8* %incdec.ptr, i64 -7
-  store i8 0, i8* %incdec.ptr26.6, align 1
-  %incdec.ptr26.7 = getelementptr inbounds i8, i8* %incdec.ptr, i64 -8
-  store i8 undef, i8* %incdec.ptr26.7, align 1
-  %div27.7 = udiv i32 %call17, 100000000
-  %rem.8 = urem i32 %div27.7, 10
-  %add2230.8 = or i32 %rem.8, 48
-  %conv25.8 = trunc i32 %add2230.8 to i8
-  %incdec.ptr26.8 = getelementptr inbounds i8, i8* %incdec.ptr, i64 -9
-  store i8 %conv25.8, i8* %incdec.ptr26.8, align 1
-  unreachable
-
-return:                                           ; preds = %entry
-  ret void
-; CHECK-LABEL: @ptoa(
-}
-
-declare noalias i8* @malloc() nounwind
-
-declare i32 @ptou()
--- a/llvm/test/Transforms/BBVectorize/X86/sh-rec2.ll
+++ b/llvm/test/Transforms/BBVectorize/X86/sh-rec2.ll
@ -1,85 +0,0 @@
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
-; RUN: opt < %s -basicaa -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -bb-vectorize -S | FileCheck %s
-
-%struct.gsm_state.2.8.14.15.16.17.19.22.23.25.26.28.29.31.32.33.35.36.37.38.40.41.42.44.45.47.48.50.52.53.54.56.57.58.59.60.61.62.63.66.73.83.84.89.90.91.92.93.94.95.96.99.100.101.102.103.104.106.107.114.116.121.122.129.130.135.136.137.138.139.140.141.142.143.144.147.148.149.158.159.160.161.164.165.166.167.168.169.172.179.181.182.183.188.195.200.201.202.203.204.205.208.209.210.212.213.214.215.222.223.225.226.230.231.232.233.234.235.236.237.238.239.240.241.242.243.244.352 = type { [280 x i16], i16, i64, i32, [8 x i16], [2 x [8 x i16]], i16, i16, [9 x i16], i16, i8, i8 }
-
-define void @gsm_encode(%struct.gsm_state.2.8.14.15.16.17.19.22.23.25.26.28.29.31.32.33.35.36.37.38.40.41.42.44.45.47.48.50.52.53.54.56.57.58.59.60.61.62.63.66.73.83.84.89.90.91.92.93.94.95.96.99.100.101.102.103.104.106.107.114.116.121.122.129.130.135.136.137.138.139.140.141.142.143.144.147.148.149.158.159.160.161.164.165.166.167.168.169.172.179.181.182.183.188.195.200.201.202.203.204.205.208.209.210.212.213.214.215.222.223.225.226.230.231.232.233.234.235.236.237.238.239.240.241.242.243.244.352* %s, i16* %source, i8* %c) nounwind uwtable {
-entry:
-  %xmc = alloca [52 x i16], align 16
-  %arraydecay5 = getelementptr inbounds [52 x i16], [52 x i16]* %xmc, i64 0, i64 0
-  call void @Gsm_Coder(%struct.gsm_state.2.8.14.15.16.17.19.22.23.25.26.28.29.31.32.33.35.36.37.38.40.41.42.44.45.47.48.50.52.53.54.56.57.58.59.60.61.62.63.66.73.83.84.89.90.91.92.93.94.95.96.99.100.101.102.103.104.106.107.114.116.121.122.129.130.135.136.137.138.139.140.141.142.143.144.147.148.149.158.159.160.161.164.165.166.167.168.169.172.179.181.182.183.188.195.200.201.202.203.204.205.208.209.210.212.213.214.215.222.223.225.226.230.231.232.233.234.235.236.237.238.239.240.241.242.243.244.352* %s, i16* %source, i16* undef, i16* null, i16* undef, i16* undef, i16* undef, i16* %arraydecay5) nounwind
-  %incdec.ptr136 = getelementptr inbounds i8, i8* %c, i64 10
-  %incdec.ptr157 = getelementptr inbounds i8, i8* %c, i64 11
-  store i8 0, i8* %incdec.ptr136, align 1
-  %arrayidx162 = getelementptr inbounds [52 x i16], [52 x i16]* %xmc, i64 0, i64 11
-  %0 = load i16, i16* %arrayidx162, align 2
-  %conv1631 = trunc i16 %0 to i8
-  %and164 = shl i8 %conv1631, 3
-  %shl165 = and i8 %and164, 56
-  %incdec.ptr172 = getelementptr inbounds i8, i8* %c, i64 12
-  store i8 %shl165, i8* %incdec.ptr157, align 1
-  %1 = load i16, i16* inttoptr (i64 2 to i16*), align 2
-  %conv1742 = trunc i16 %1 to i8
-  %and175 = shl i8 %conv1742, 1
-  %incdec.ptr183 = getelementptr inbounds i8, i8* %c, i64 13
-  store i8 %and175, i8* %incdec.ptr172, align 1
-  %incdec.ptr199 = getelementptr inbounds i8, i8* %c, i64 14
-  store i8 0, i8* %incdec.ptr183, align 1
-  %arrayidx214 = getelementptr inbounds [52 x i16], [52 x i16]* %xmc, i64 0, i64 15
-  %incdec.ptr220 = getelementptr inbounds i8, i8* %c, i64 15
-  store i8 0, i8* %incdec.ptr199, align 1
-  %2 = load i16, i16* %arrayidx214, align 2
-  %conv2223 = trunc i16 %2 to i8
-  %and223 = shl i8 %conv2223, 6
-  %incdec.ptr235 = getelementptr inbounds i8, i8* %c, i64 16
-  store i8 %and223, i8* %incdec.ptr220, align 1
-  %arrayidx240 = getelementptr inbounds [52 x i16], [52 x i16]* %xmc, i64 0, i64 19
-  %3 = load i16, i16* %arrayidx240, align 2
-  %conv2414 = trunc i16 %3 to i8
-  %and242 = shl i8 %conv2414, 2
-  %shl243 = and i8 %and242, 28
-  %incdec.ptr251 = getelementptr inbounds i8, i8* %c, i64 17
-  store i8 %shl243, i8* %incdec.ptr235, align 1
-  %incdec.ptr272 = getelementptr inbounds i8, i8* %c, i64 18
-  store i8 0, i8* %incdec.ptr251, align 1
-  %arrayidx282 = getelementptr inbounds [52 x i16], [52 x i16]* %xmc, i64 0, i64 25
-  %4 = load i16, i16* %arrayidx282, align 2
-  %conv2835 = trunc i16 %4 to i8
-  %and284 = and i8 %conv2835, 7
-  %incdec.ptr287 = getelementptr inbounds i8, i8* %c, i64 19
-  store i8 %and284, i8* %incdec.ptr272, align 1
-  %incdec.ptr298 = getelementptr inbounds i8, i8* %c, i64 20
-  store i8 0, i8* %incdec.ptr287, align 1
-  %incdec.ptr314 = getelementptr inbounds i8, i8* %c, i64 21
-  store i8 0, i8* %incdec.ptr298, align 1
-  %arrayidx319 = getelementptr inbounds [52 x i16], [52 x i16]* %xmc, i64 0, i64 26
-  %5 = load i16, i16* %arrayidx319, align 4
-  %conv3206 = trunc i16 %5 to i8
-  %and321 = shl i8 %conv3206, 4
-  %shl322 = and i8 %and321, 112
-  %incdec.ptr335 = getelementptr inbounds i8, i8* %c, i64 22
-  store i8 %shl322, i8* %incdec.ptr314, align 1
-  %arrayidx340 = getelementptr inbounds [52 x i16], [52 x i16]* %xmc, i64 0, i64 29
-  %6 = load i16, i16* %arrayidx340, align 2
-  %conv3417 = trunc i16 %6 to i8
-  %and342 = shl i8 %conv3417, 3
-  %shl343 = and i8 %and342, 56
-  %incdec.ptr350 = getelementptr inbounds i8, i8* %c, i64 23
-  store i8 %shl343, i8* %incdec.ptr335, align 1
-  %incdec.ptr366 = getelementptr inbounds i8, i8* %c, i64 24
-  store i8 0, i8* %incdec.ptr350, align 1
-  %arrayidx381 = getelementptr inbounds [52 x i16], [52 x i16]* %xmc, i64 0, i64 36
-  %incdec.ptr387 = getelementptr inbounds i8, i8* %c, i64 25
-  store i8 0, i8* %incdec.ptr366, align 1
-  %7 = load i16, i16* %arrayidx381, align 8
-  %conv3898 = trunc i16 %7 to i8
-  %and390 = shl i8 %conv3898, 6
-  store i8 %and390, i8* %incdec.ptr387, align 1
-  unreachable
-; CHECK-LABEL: @gsm_encode(
-}
-
-declare void @Gsm_Coder(%struct.gsm_state.2.8.14.15.16.17.19.22.23.25.26.28.29.31.32.33.35.36.37.38.40.41.42.44.45.47.48.50.52.53.54.56.57.58.59.60.61.62.63.66.73.83.84.89.90.91.92.93.94.95.96.99.100.101.102.103.104.106.107.114.116.121.122.129.130.135.136.137.138.139.140.141.142.143.144.147.148.149.158.159.160.161.164.165.166.167.168.169.172.179.181.182.183.188.195.200.201.202.203.204.205.208.209.210.212.213.214.215.222.223.225.226.230.231.232.233.234.235.236.237.238.239.240.241.242.243.244.352*, i16*, i16*, i16*, i16*, i16*, i16*, i16*)
-
-declare void @llvm.trap() noreturn nounwind
--- a/llvm/test/Transforms/BBVectorize/X86/sh-rec3.ll
+++ b/llvm/test/Transforms/BBVectorize/X86/sh-rec3.ll
@ -1,170 +0,0 @@
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
-; RUN: opt < %s -basicaa -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -bb-vectorize -S | FileCheck %s
-
-%struct.gsm_state.2.8.39.44.45.55.56.57.58.59.62.63.64.65.74.75.76.77.80.87.92.93.94.95.96.97.110.111.112.113.114.128.130.135.136.137.138.139.140.141.142.143.144.145.148.149.150.151.152.169.170.177.178.179.184.185.186.187.188.201.208.209.219.220.221.223.224.225.230.231.232.233.235.236.237.238.245.246.248.249.272.274.279.280.281.282.283.286.293.298.299.314.315.316.317.318.319.320.321.322.323.324.325.326.327.328.329.330.331.332.333.334.335.336.337.338.339.340.341.342.343.344.345.346.347.348.349.350.351.352.353.565 = type { [280 x i16], i16, i64, i32, [8 x i16], [2 x [8 x i16]], i16, i16, [9 x i16], i16, i8, i8 }
-
-define void @gsm_encode(%struct.gsm_state.2.8.39.44.45.55.56.57.58.59.62.63.64.65.74.75.76.77.80.87.92.93.94.95.96.97.110.111.112.113.114.128.130.135.136.137.138.139.140.141.142.143.144.145.148.149.150.151.152.169.170.177.178.179.184.185.186.187.188.201.208.209.219.220.221.223.224.225.230.231.232.233.235.236.237.238.245.246.248.249.272.274.279.280.281.282.283.286.293.298.299.314.315.316.317.318.319.320.321.322.323.324.325.326.327.328.329.330.331.332.333.334.335.336.337.338.339.340.341.342.343.344.345.346.347.348.349.350.351.352.353.565* %s, i16* %source, i8* %c) nounwind uwtable {
-entry:
-  %LARc28 = alloca [2 x i64], align 16
-  %LARc28.sub = getelementptr inbounds [2 x i64], [2 x i64]* %LARc28, i64 0, i64 0
-  %tmpcast = bitcast [2 x i64]* %LARc28 to [8 x i16]*
-  %Nc = alloca [4 x i16], align 2
-  %Mc = alloca [4 x i16], align 2
-  %bc = alloca [4 x i16], align 2
-  %xmc = alloca [52 x i16], align 16
-  %arraydecay = bitcast [2 x i64]* %LARc28 to i16*
-  %arraydecay1 = getelementptr inbounds [4 x i16], [4 x i16]* %Nc, i64 0, i64 0
-  %arraydecay2 = getelementptr inbounds [4 x i16], [4 x i16]* %bc, i64 0, i64 0
-  %arraydecay3 = getelementptr inbounds [4 x i16], [4 x i16]* %Mc, i64 0, i64 0
-  %arraydecay5 = getelementptr inbounds [52 x i16], [52 x i16]* %xmc, i64 0, i64 0
-  call void @Gsm_Coder(%struct.gsm_state.2.8.39.44.45.55.56.57.58.59.62.63.64.65.74.75.76.77.80.87.92.93.94.95.96.97.110.111.112.113.114.128.130.135.136.137.138.139.140.141.142.143.144.145.148.149.150.151.152.169.170.177.178.179.184.185.186.187.188.201.208.209.219.220.221.223.224.225.230.231.232.233.235.236.237.238.245.246.248.249.272.274.279.280.281.282.283.286.293.298.299.314.315.316.317.318.319.320.321.322.323.324.325.326.327.328.329.330.331.332.333.334.335.336.337.338.339.340.341.342.343.344.345.346.347.348.349.350.351.352.353.565* %s, i16* %source, i16* %arraydecay, i16* %arraydecay1, i16* %arraydecay2, i16* %arraydecay3, i16* undef, i16* %arraydecay5) nounwind
-  %0 = load i64, i64* %LARc28.sub, align 16
-  %1 = trunc i64 %0 to i32
-  %conv1 = lshr i32 %1, 2
-  %and = and i32 %conv1, 15
-  %or = or i32 %and, 208
-  %conv6 = trunc i32 %or to i8
-  %incdec.ptr = getelementptr inbounds i8, i8* %c, i64 1
-  store i8 %conv6, i8* %c, align 1
-  %conv84 = trunc i64 %0 to i8
-  %and9 = shl i8 %conv84, 6
-  %incdec.ptr15 = getelementptr inbounds i8, i8* %c, i64 2
-  store i8 %and9, i8* %incdec.ptr, align 1
-  %2 = lshr i64 %0, 50
-  %shr226.tr = trunc i64 %2 to i8
-  %conv25 = and i8 %shr226.tr, 7
-  %incdec.ptr26 = getelementptr inbounds i8, i8* %c, i64 3
-  store i8 %conv25, i8* %incdec.ptr15, align 1
-  %incdec.ptr42 = getelementptr inbounds i8, i8* %c, i64 4
-  store i8 0, i8* %incdec.ptr26, align 1
-  %arrayidx52 = getelementptr inbounds [8 x i16], [8 x i16]* %tmpcast, i64 0, i64 7
-  %3 = load i16, i16* %arrayidx52, align 2
-  %conv537 = trunc i16 %3 to i8
-  %and54 = and i8 %conv537, 7
-  %incdec.ptr57 = getelementptr inbounds i8, i8* %c, i64 5
-  store i8 %and54, i8* %incdec.ptr42, align 1
-  %incdec.ptr68 = getelementptr inbounds i8, i8* %c, i64 6
-  store i8 0, i8* %incdec.ptr57, align 1
-  %4 = load i16, i16* %arraydecay3, align 2
-  %conv748 = trunc i16 %4 to i8
-  %and75 = shl i8 %conv748, 5
-  %shl76 = and i8 %and75, 96
-  %incdec.ptr84 = getelementptr inbounds i8, i8* %c, i64 7
-  store i8 %shl76, i8* %incdec.ptr68, align 1
-  %arrayidx94 = getelementptr inbounds [52 x i16], [52 x i16]* %xmc, i64 0, i64 1
-  %5 = load i16, i16* %arrayidx94, align 2
-  %conv959 = trunc i16 %5 to i8
-  %and96 = shl i8 %conv959, 1
-  %shl97 = and i8 %and96, 14
-  %or103 = or i8 %shl97, 1
-  %incdec.ptr105 = getelementptr inbounds i8, i8* %c, i64 8
-  store i8 %or103, i8* %incdec.ptr84, align 1
-  %arrayidx115 = getelementptr inbounds [52 x i16], [52 x i16]* %xmc, i64 0, i64 4
-  %6 = bitcast i16* %arrayidx115 to i32*
-  %7 = load i32, i32* %6, align 8
-  %conv11610 = trunc i32 %7 to i8
-  %and117 = and i8 %conv11610, 7
-  %incdec.ptr120 = getelementptr inbounds i8, i8* %c, i64 9
-  store i8 %and117, i8* %incdec.ptr105, align 1
-  %8 = lshr i32 %7, 16
-  %and12330 = shl nuw nsw i32 %8, 5
-  %and123 = trunc i32 %and12330 to i8
-  %incdec.ptr136 = getelementptr inbounds i8, i8* %c, i64 10
-  store i8 %and123, i8* %incdec.ptr120, align 1
-  %incdec.ptr157 = getelementptr inbounds i8, i8* %c, i64 11
-  store i8 0, i8* %incdec.ptr136, align 1
-  %incdec.ptr172 = getelementptr inbounds i8, i8* %c, i64 12
-  store i8 0, i8* %incdec.ptr157, align 1
-  %arrayidx173 = getelementptr inbounds [4 x i16], [4 x i16]* %Nc, i64 0, i64 1
-  %9 = load i16, i16* %arrayidx173, align 2
-  %conv17412 = zext i16 %9 to i32
-  %and175 = shl nuw nsw i32 %conv17412, 1
-  %arrayidx177 = getelementptr inbounds [4 x i16], [4 x i16]* %bc, i64 0, i64 1
-  %10 = load i16, i16* %arrayidx177, align 2
-  %conv17826 = zext i16 %10 to i32
-  %shr17913 = lshr i32 %conv17826, 1
-  %and180 = and i32 %shr17913, 1
-  %or181 = or i32 %and175, %and180
-  %conv182 = trunc i32 %or181 to i8
-  %incdec.ptr183 = getelementptr inbounds i8, i8* %c, i64 13
-  store i8 %conv182, i8* %incdec.ptr172, align 1
-  %arrayidx188 = getelementptr inbounds [4 x i16], [4 x i16]* %Mc, i64 0, i64 1
-  %11 = load i16, i16* %arrayidx188, align 2
-  %conv18914 = trunc i16 %11 to i8
-  %and190 = shl i8 %conv18914, 5
-  %shl191 = and i8 %and190, 96
-  %incdec.ptr199 = getelementptr inbounds i8, i8* %c, i64 14
-  store i8 %shl191, i8* %incdec.ptr183, align 1
-  %arrayidx209 = getelementptr inbounds [52 x i16], [52 x i16]* %xmc, i64 0, i64 14
-  %12 = load i16, i16* %arrayidx209, align 4
-  %conv21015 = trunc i16 %12 to i8
-  %and211 = shl i8 %conv21015, 1
-  %shl212 = and i8 %and211, 14
-  %or218 = or i8 %shl212, 1
-  %incdec.ptr220 = getelementptr inbounds i8, i8* %c, i64 15
-  store i8 %or218, i8* %incdec.ptr199, align 1
-  %arrayidx225 = getelementptr inbounds [52 x i16], [52 x i16]* %xmc, i64 0, i64 16
-  %13 = bitcast i16* %arrayidx225 to i64*
-  %14 = load i64, i64* %13, align 16
-  %conv22616 = trunc i64 %14 to i8
-  %and227 = shl i8 %conv22616, 3
-  %shl228 = and i8 %and227, 56
-  %incdec.ptr235 = getelementptr inbounds i8, i8* %c, i64 16
-  store i8 %shl228, i8* %incdec.ptr220, align 1
-  %15 = lshr i64 %14, 32
-  %and23832 = shl nuw nsw i64 %15, 5
-  %and238 = trunc i64 %and23832 to i8
-  %incdec.ptr251 = getelementptr inbounds i8, i8* %c, i64 17
-  store i8 %and238, i8* %incdec.ptr235, align 1
-  %arrayidx266 = getelementptr inbounds [52 x i16], [52 x i16]* %xmc, i64 0, i64 23
-  %incdec.ptr272 = getelementptr inbounds i8, i8* %c, i64 18
-  store i8 0, i8* %incdec.ptr251, align 1
-  %16 = load i16, i16* %arrayidx266, align 2
-  %conv27418 = trunc i16 %16 to i8
-  %and275 = shl i8 %conv27418, 6
-  %incdec.ptr287 = getelementptr inbounds i8, i8* %c, i64 19
-  store i8 %and275, i8* %incdec.ptr272, align 1
-  %arrayidx288 = getelementptr inbounds [4 x i16], [4 x i16]* %Nc, i64 0, i64 2
-  %17 = load i16, i16* %arrayidx288, align 2
-  %conv28919 = zext i16 %17 to i32
-  %and290 = shl nuw nsw i32 %conv28919, 1
-  %arrayidx292 = getelementptr inbounds [4 x i16], [4 x i16]* %bc, i64 0, i64 2
-  %18 = load i16, i16* %arrayidx292, align 2
-  %conv29327 = zext i16 %18 to i32
-  %shr29420 = lshr i32 %conv29327, 1
-  %and295 = and i32 %shr29420, 1
-  %or296 = or i32 %and290, %and295
-  %conv297 = trunc i32 %or296 to i8
-  %incdec.ptr298 = getelementptr inbounds i8, i8* %c, i64 20
-  store i8 %conv297, i8* %incdec.ptr287, align 1
-  %conv30021 = trunc i16 %18 to i8
-  %and301 = shl i8 %conv30021, 7
-  %incdec.ptr314 = getelementptr inbounds i8, i8* %c, i64 21
-  store i8 %and301, i8* %incdec.ptr298, align 1
-  %incdec.ptr335 = getelementptr inbounds i8, i8* %c, i64 22
-  store i8 0, i8* %incdec.ptr314, align 1
-  %arrayidx340 = getelementptr inbounds [52 x i16], [52 x i16]* %xmc, i64 0, i64 29
-  %19 = load i16, i16* %arrayidx340, align 2
-  %conv34122 = trunc i16 %19 to i8
-  %and342 = shl i8 %conv34122, 3
-  %shl343 = and i8 %and342, 56
-  %incdec.ptr350 = getelementptr inbounds i8, i8* %c, i64 23
-  store i8 %shl343, i8* %incdec.ptr335, align 1
-  %arrayidx355 = getelementptr inbounds [52 x i16], [52 x i16]* %xmc, i64 0, i64 32
-  %20 = bitcast i16* %arrayidx355 to i32*
-  %21 = load i32, i32* %20, align 16
-  %conv35623 = shl i32 %21, 2
-  %shl358 = and i32 %conv35623, 28
-  %22 = lshr i32 %21, 17
-  %and363 = and i32 %22, 3
-  %or364 = or i32 %shl358, %and363
-  %conv365 = trunc i32 %or364 to i8
-  store i8 %conv365, i8* %incdec.ptr350, align 1
-  unreachable
-; CHECK-LABEL: @gsm_encode(
-}
-
-declare void @Gsm_Coder(%struct.gsm_state.2.8.39.44.45.55.56.57.58.59.62.63.64.65.74.75.76.77.80.87.92.93.94.95.96.97.110.111.112.113.114.128.130.135.136.137.138.139.140.141.142.143.144.145.148.149.150.151.152.169.170.177.178.179.184.185.186.187.188.201.208.209.219.220.221.223.224.225.230.231.232.233.235.236.237.238.245.246.248.249.272.274.279.280.281.282.283.286.293.298.299.314.315.316.317.318.319.320.321.322.323.324.325.326.327.328.329.330.331.332.333.334.335.336.337.338.339.340.341.342.343.344.345.346.347.348.349.350.351.352.353.565*, i16*, i16*, i16*, i16*, i16*, i16*, i16*)
-
-declare void @llvm.trap() noreturn nounwind
--- a/llvm/test/Transforms/BBVectorize/X86/sh-types.ll
+++ b/llvm/test/Transforms/BBVectorize/X86/sh-types.ll
@ -1,25 +0,0 @@
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -bb-vectorize -S | FileCheck %s
-
-define <4 x float> @test7(<4 x float> %A1, <4 x float> %B1, double %C1, double %C2, double %D1, double %D2) {
-        %A2 = shufflevector <4 x float> %A1, <4 x float> undef, <4 x i32> <i32 2, i32 1, i32 0, i32 3>
-        %B2 = shufflevector <4 x float> %B1, <4 x float> undef, <4 x i32> <i32 2, i32 1, i32 0, i32 3>
-        %X1 = shufflevector <4 x float> %A2, <4 x float> undef, <2 x i32> <i32 0, i32 1>
-        %X2 = shufflevector <4 x float> %B2, <4 x float> undef, <2 x i32> <i32 2, i32 3>
-        %Y1 = shufflevector <2 x float> %X1, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
-        %Y2 = shufflevector <2 x float> %X2, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
-
-	%M1 = fsub double %C1, %D1
-	%M2 = fsub double %C2, %D2
-	%N1 = fmul double %M1, %C1
-	%N2 = fmul double %M2, %C2
-	%Z1 = fadd double %N1, %D1
-	%Z2 = fadd double %N2, %D2
-
-        %R = fmul <4 x float> %Y1, %Y2
-        ret <4 x float> %R
-; CHECK-LABEL: @test7(
-; CHECK-NOT: <8 x float>
-; CHECK: ret <4 x float>
-}
-
--- a/llvm/test/Transforms/BBVectorize/X86/simple-int.ll
+++ b/llvm/test/Transforms/BBVectorize/X86/simple-int.ll
@ -1,127 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s
-
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
-
-declare double @llvm.fma.f64(double, double, double)
-declare double @llvm.fmuladd.f64(double, double, double)
-declare double @llvm.cos.f64(double)
-declare double @llvm.powi.f64(double, i32)
-
-; Basic depth-3 chain with fma
-define double @test1(double %A1, double %A2, double %B1, double %B2, double %C1, double %C2) {
-; CHECK-LABEL: @test1(
-; CHECK-NEXT:    [[X1:%.*]] = fsub double [[A1:%.*]], [[B1:%.*]]
-; CHECK-NEXT:    [[X2:%.*]] = fsub double [[A2:%.*]], [[B2:%.*]]
-; CHECK-NEXT:    [[Y1:%.*]] = call double @llvm.fma.f64(double [[X1]], double [[A1]], double [[C1:%.*]])
-; CHECK-NEXT:    [[Y2:%.*]] = call double @llvm.fma.f64(double [[X2]], double [[A2]], double [[C2:%.*]])
-; CHECK-NEXT:    [[Z1:%.*]] = fadd double [[Y1]], [[B1]]
-; CHECK-NEXT:    [[Z2:%.*]] = fadd double [[Y2]], [[B2]]
-; CHECK-NEXT:    [[R:%.*]] = fmul double [[Z1]], [[Z2]]
-; CHECK-NEXT:    ret double [[R]]
-;
-  %X1 = fsub double %A1, %B1
-  %X2 = fsub double %A2, %B2
-  %Y1 = call double @llvm.fma.f64(double %X1, double %A1, double %C1)
-  %Y2 = call double @llvm.fma.f64(double %X2, double %A2, double %C2)
-  %Z1 = fadd double %Y1, %B1
-  %Z2 = fadd double %Y2, %B2
-  %R  = fmul double %Z1, %Z2
-  ret double %R
-}
-
-; Basic depth-3 chain with fmuladd
-define double @test1a(double %A1, double %A2, double %B1, double %B2, double %C1, double %C2) {
-; CHECK-LABEL: @test1a(
-; CHECK-NEXT:    [[X1_V_I1_1:%.*]] = insertelement <2 x double> undef, double [[B1:%.*]], i32 0
-; CHECK-NEXT:    [[X1_V_I1_2:%.*]] = insertelement <2 x double> [[X1_V_I1_1]], double [[B2:%.*]], i32 1
-; CHECK-NEXT:    [[X1_V_I0_1:%.*]] = insertelement <2 x double> undef, double [[A1:%.*]], i32 0
-; CHECK-NEXT:    [[X1_V_I0_2:%.*]] = insertelement <2 x double> [[X1_V_I0_1]], double [[A2:%.*]], i32 1
-; CHECK-NEXT:    [[X1:%.*]] = fsub <2 x double> [[X1_V_I0_2]], [[X1_V_I1_2]]
-; CHECK-NEXT:    [[Y1_V_I2_1:%.*]] = insertelement <2 x double> undef, double [[C1:%.*]], i32 0
-; CHECK-NEXT:    [[Y1_V_I2_2:%.*]] = insertelement <2 x double> [[Y1_V_I2_1]], double [[C2:%.*]], i32 1
-; CHECK-NEXT:    [[Y1:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[X1]], <2 x double> [[X1_V_I0_2]], <2 x double> [[Y1_V_I2_2]])
-; CHECK-NEXT:    [[Z1:%.*]] = fadd <2 x double> [[Y1]], [[X1_V_I1_2]]
-; CHECK-NEXT:    [[Z1_V_R1:%.*]] = extractelement <2 x double> [[Z1]], i32 0
-; CHECK-NEXT:    [[Z1_V_R2:%.*]] = extractelement <2 x double> [[Z1]], i32 1
-; CHECK-NEXT:    [[R:%.*]] = fmul double [[Z1_V_R1]], [[Z1_V_R2]]
-; CHECK-NEXT:    ret double [[R]]
-;
-  %X1 = fsub double %A1, %B1
-  %X2 = fsub double %A2, %B2
-  %Y1 = call double @llvm.fmuladd.f64(double %X1, double %A1, double %C1)
-  %Y2 = call double @llvm.fmuladd.f64(double %X2, double %A2, double %C2)
-  %Z1 = fadd double %Y1, %B1
-  %Z2 = fadd double %Y2, %B2
-  %R  = fmul double %Z1, %Z2
-  ret double %R
-}
-
-; Basic depth-3 chain with cos
-define double @test2(double %A1, double %A2, double %B1, double %B2) {
-; CHECK-LABEL: @test2(
-; CHECK-NEXT:    [[X1:%.*]] = fsub double [[A1:%.*]], [[B1:%.*]]
-; CHECK-NEXT:    [[X2:%.*]] = fsub double [[A2:%.*]], [[B2:%.*]]
-; CHECK-NEXT:    [[Y1:%.*]] = call double @llvm.cos.f64(double [[X1]])
-; CHECK-NEXT:    [[Y2:%.*]] = call double @llvm.cos.f64(double [[X2]])
-; CHECK-NEXT:    [[Z1:%.*]] = fadd double [[Y1]], [[B1]]
-; CHECK-NEXT:    [[Z2:%.*]] = fadd double [[Y2]], [[B2]]
-; CHECK-NEXT:    [[R:%.*]] = fmul double [[Z1]], [[Z2]]
-; CHECK-NEXT:    ret double [[R]]
-;
-  %X1 = fsub double %A1, %B1
-  %X2 = fsub double %A2, %B2
-  %Y1 = call double @llvm.cos.f64(double %X1)
-  %Y2 = call double @llvm.cos.f64(double %X2)
-  %Z1 = fadd double %Y1, %B1
-  %Z2 = fadd double %Y2, %B2
-  %R  = fmul double %Z1, %Z2
-  ret double %R
-}
-
-; Basic depth-3 chain with powi
-define double @test3(double %A1, double %A2, double %B1, double %B2, i32 %P) {
-; CHECK-LABEL: @test3(
-; CHECK-NEXT:    [[X1:%.*]] = fsub double [[A1:%.*]], [[B1:%.*]]
-; CHECK-NEXT:    [[X2:%.*]] = fsub double [[A2:%.*]], [[B2:%.*]]
-; CHECK-NEXT:    [[Y1:%.*]] = call double @llvm.powi.f64(double [[X1]], i32 [[P:%.*]])
-; CHECK-NEXT:    [[Y2:%.*]] = call double @llvm.powi.f64(double [[X2]], i32 [[P]])
-; CHECK-NEXT:    [[Z1:%.*]] = fadd double [[Y1]], [[B1]]
-; CHECK-NEXT:    [[Z2:%.*]] = fadd double [[Y2]], [[B2]]
-; CHECK-NEXT:    [[R:%.*]] = fmul double [[Z1]], [[Z2]]
-; CHECK-NEXT:    ret double [[R]]
-;
-  %X1 = fsub double %A1, %B1
-  %X2 = fsub double %A2, %B2
-  %Y1 = call double @llvm.powi.f64(double %X1, i32 %P)
-  %Y2 = call double @llvm.powi.f64(double %X2, i32 %P)
-  %Z1 = fadd double %Y1, %B1
-  %Z2 = fadd double %Y2, %B2
-  %R  = fmul double %Z1, %Z2
-  ret double %R
-}
-
-; Basic depth-3 chain with powi (different powers: should not vectorize)
-define double @test4(double %A1, double %A2, double %B1, double %B2, i32 %P) {
-; CHECK-LABEL: @test4(
-; CHECK-NEXT:    [[X1:%.*]] = fsub double [[A1:%.*]], [[B1:%.*]]
-; CHECK-NEXT:    [[X2:%.*]] = fsub double [[A2:%.*]], [[B2:%.*]]
-; CHECK-NEXT:    [[P2:%.*]] = add i32 [[P:%.*]], 1
-; CHECK-NEXT:    [[Y1:%.*]] = call double @llvm.powi.f64(double [[X1]], i32 [[P]])
-; CHECK-NEXT:    [[Y2:%.*]] = call double @llvm.powi.f64(double [[X2]], i32 [[P2]])
-; CHECK-NEXT:    [[Z1:%.*]] = fadd double [[Y1]], [[B1]]
-; CHECK-NEXT:    [[Z2:%.*]] = fadd double [[Y2]], [[B2]]
-; CHECK-NEXT:    [[R:%.*]] = fmul double [[Z1]], [[Z2]]
-; CHECK-NEXT:    ret double [[R]]
-;
-  %X1 = fsub double %A1, %B1
-  %X2 = fsub double %A2, %B2
-  %P2 = add i32 %P, 1
-  %Y1 = call double @llvm.powi.f64(double %X1, i32 %P)
-  %Y2 = call double @llvm.powi.f64(double %X2, i32 %P2)
-  %Z1 = fadd double %Y1, %B1
-  %Z2 = fadd double %Y2, %B2
-  %R  = fmul double %Z1, %Z2
-  ret double %R
-}
-
--- a/llvm/test/Transforms/BBVectorize/X86/simple-ldstr.ll
+++ b/llvm/test/Transforms/BBVectorize/X86/simple-ldstr.ll
@ -1,33 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s
-
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-
-; Simple 3-pair chain with loads and stores
-define void @test1(double* %a, double* %b, double* %c) nounwind uwtable readonly {
-; CHECK-LABEL: @test1(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[I0_V_I0:%.*]] = bitcast double* [[A:%.*]] to <2 x double>*
-; CHECK-NEXT:    [[I1_V_I0:%.*]] = bitcast double* [[B:%.*]] to <2 x double>*
-; CHECK-NEXT:    [[I0:%.*]] = load <2 x double>, <2 x double>* [[I0_V_I0]], align 8
-; CHECK-NEXT:    [[I1:%.*]] = load <2 x double>, <2 x double>* [[I1_V_I0]], align 8
-; CHECK-NEXT:    [[MUL:%.*]] = fmul <2 x double> [[I0]], [[I1]]
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast double* [[C:%.*]] to <2 x double>*
-; CHECK-NEXT:    store <2 x double> [[MUL]], <2 x double>* [[TMP0]], align 8
-; CHECK-NEXT:    ret void
-;
-entry:
-  %i0 = load double, double* %a, align 8
-  %i1 = load double, double* %b, align 8
-  %mul = fmul double %i0, %i1
-  %arrayidx3 = getelementptr inbounds double, double* %a, i64 1
-  %i3 = load double, double* %arrayidx3, align 8
-  %arrayidx4 = getelementptr inbounds double, double* %b, i64 1
-  %i4 = load double, double* %arrayidx4, align 8
-  %mul5 = fmul double %i3, %i4
-  store double %mul, double* %c, align 8
-  %arrayidx5 = getelementptr inbounds double, double* %c, i64 1
-  store double %mul5, double* %arrayidx5, align 8
-  ret void
-}
-
--- a/llvm/test/Transforms/BBVectorize/X86/simple.ll
+++ b/llvm/test/Transforms/BBVectorize/X86/simple.ll
@ -1,149 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s
-
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
-
-; Basic depth-3 chain
-define double @test1(double %A1, double %A2, double %B1, double %B2) {
-; CHECK-LABEL: @test1(
-; CHECK-NEXT:    [[X1_V_I1_1:%.*]] = insertelement <2 x double> undef, double [[B1:%.*]], i32 0
-; CHECK-NEXT:    [[X1_V_I1_2:%.*]] = insertelement <2 x double> [[X1_V_I1_1]], double [[B2:%.*]], i32 1
-; CHECK-NEXT:    [[X1_V_I0_1:%.*]] = insertelement <2 x double> undef, double [[A1:%.*]], i32 0
-; CHECK-NEXT:    [[X1_V_I0_2:%.*]] = insertelement <2 x double> [[X1_V_I0_1]], double [[A2:%.*]], i32 1
-; CHECK-NEXT:    [[X1:%.*]] = fsub <2 x double> [[X1_V_I0_2]], [[X1_V_I1_2]]
-; CHECK-NEXT:    [[Y1:%.*]] = fmul <2 x double> [[X1]], [[X1_V_I0_2]]
-; CHECK-NEXT:    [[Z1:%.*]] = fadd <2 x double> [[Y1]], [[X1_V_I1_2]]
-; CHECK-NEXT:    [[Z1_V_R1:%.*]] = extractelement <2 x double> [[Z1]], i32 0
-; CHECK-NEXT:    [[Z1_V_R2:%.*]] = extractelement <2 x double> [[Z1]], i32 1
-; CHECK-NEXT:    [[R:%.*]] = fmul double [[Z1_V_R1]], [[Z1_V_R2]]
-; CHECK-NEXT:    ret double [[R]]
-;
-  %X1 = fsub double %A1, %B1
-  %X2 = fsub double %A2, %B2
-  %Y1 = fmul double %X1, %A1
-  %Y2 = fmul double %X2, %A2
-  %Z1 = fadd double %Y1, %B1
-  %Z2 = fadd double %Y2, %B2
-  %R  = fmul double %Z1, %Z2
-  ret double %R
-}
-
-; Basic chain
-define double @test1a(double %A1, double %A2, double %B1, double %B2) {
-; CHECK-LABEL: @test1a(
-; CHECK-NEXT:    [[X1_V_I1_1:%.*]] = insertelement <2 x double> undef, double [[B1:%.*]], i32 0
-; CHECK-NEXT:    [[X1_V_I1_2:%.*]] = insertelement <2 x double> [[X1_V_I1_1]], double [[B2:%.*]], i32 1
-; CHECK-NEXT:    [[X1_V_I0_1:%.*]] = insertelement <2 x double> undef, double [[A1:%.*]], i32 0
-; CHECK-NEXT:    [[X1_V_I0_2:%.*]] = insertelement <2 x double> [[X1_V_I0_1]], double [[A2:%.*]], i32 1
-; CHECK-NEXT:    [[X1:%.*]] = fsub <2 x double> [[X1_V_I0_2]], [[X1_V_I1_2]]
-; CHECK-NEXT:    [[Y1:%.*]] = fmul <2 x double> [[X1]], [[X1_V_I0_2]]
-; CHECK-NEXT:    [[Z1:%.*]] = fadd <2 x double> [[Y1]], [[X1_V_I1_2]]
-; CHECK-NEXT:    [[W1:%.*]] = fadd <2 x double> [[Y1]], [[Z1]]
-; CHECK-NEXT:    [[V1:%.*]] = fadd <2 x double> [[W1]], [[Z1]]
-; CHECK-NEXT:    [[Q1:%.*]] = fadd <2 x double> [[W1]], [[V1]]
-; CHECK-NEXT:    [[S1:%.*]] = fadd <2 x double> [[W1]], [[Q1]]
-; CHECK-NEXT:    [[S1_V_R1:%.*]] = extractelement <2 x double> [[S1]], i32 0
-; CHECK-NEXT:    [[S1_V_R2:%.*]] = extractelement <2 x double> [[S1]], i32 1
-; CHECK-NEXT:    [[R:%.*]] = fmul double [[S1_V_R1]], [[S1_V_R2]]
-; CHECK-NEXT:    ret double [[R]]
-;
-  %X1 = fsub double %A1, %B1
-  %X2 = fsub double %A2, %B2
-  %Y1 = fmul double %X1, %A1
-  %Y2 = fmul double %X2, %A2
-  %Z1 = fadd double %Y1, %B1
-  %Z2 = fadd double %Y2, %B2
-  %W1 = fadd double %Y1, %Z1
-  %W2 = fadd double %Y2, %Z2
-  %V1 = fadd double %W1, %Z1
-  %V2 = fadd double %W2, %Z2
-  %Q1 = fadd double %W1, %V1
-  %Q2 = fadd double %W2, %V2
-  %S1 = fadd double %W1, %Q1
-  %S2 = fadd double %W2, %Q2
-  %R  = fmul double %S1, %S2
-  ret double %R
-}
-
-; Basic depth-3 chain (last pair permuted)
-define double @test2(double %A1, double %A2, double %B1, double %B2) {
-; CHECK-LABEL: @test2(
-; CHECK-NEXT:    [[X1_V_I1_1:%.*]] = insertelement <2 x double> undef, double [[B1:%.*]], i32 0
-; CHECK-NEXT:    [[X1_V_I1_2:%.*]] = insertelement <2 x double> [[X1_V_I1_1]], double [[B2:%.*]], i32 1
-; CHECK-NEXT:    [[X1_V_I0_1:%.*]] = insertelement <2 x double> undef, double [[A1:%.*]], i32 0
-; CHECK-NEXT:    [[X1_V_I0_2:%.*]] = insertelement <2 x double> [[X1_V_I0_1]], double [[A2:%.*]], i32 1
-; CHECK-NEXT:    [[X1:%.*]] = fsub <2 x double> [[X1_V_I0_2]], [[X1_V_I1_2]]
-; CHECK-NEXT:    [[Y1:%.*]] = fmul <2 x double> [[X1]], [[X1_V_I0_2]]
-; CHECK-NEXT:    [[Z1_V_I1_1:%.*]] = insertelement <2 x double> undef, double [[B2]], i32 0
-; CHECK-NEXT:    [[Z1_V_I1_2:%.*]] = insertelement <2 x double> [[Z1_V_I1_1]], double [[B1]], i32 1
-; CHECK-NEXT:    [[Z2:%.*]] = fadd <2 x double> [[Y1]], [[Z1_V_I1_2]]
-; CHECK-NEXT:    [[Z2_V_R1:%.*]] = extractelement <2 x double> [[Z2]], i32 0
-; CHECK-NEXT:    [[Z2_V_R2:%.*]] = extractelement <2 x double> [[Z2]], i32 1
-; CHECK-NEXT:    [[R:%.*]] = fmul double [[Z2_V_R2]], [[Z2_V_R1]]
-; CHECK-NEXT:    ret double [[R]]
-;
-  %X1 = fsub double %A1, %B1
-  %X2 = fsub double %A2, %B2
-  %Y1 = fmul double %X1, %A1
-  %Y2 = fmul double %X2, %A2
-  %Z1 = fadd double %Y2, %B1
-  %Z2 = fadd double %Y1, %B2
-  %R  = fmul double %Z1, %Z2
-  ret double %R
-}
-
-; Basic depth-4 chain (internal permutation)
-define double @test4(double %A1, double %A2, double %B1, double %B2) {
-; CHECK-LABEL: @test4(
-; CHECK-NEXT:    [[X1_V_I1_1:%.*]] = insertelement <2 x double> undef, double [[B1:%.*]], i32 0
-; CHECK-NEXT:    [[X1_V_I1_2:%.*]] = insertelement <2 x double> [[X1_V_I1_1]], double [[B2:%.*]], i32 1
-; CHECK-NEXT:    [[X1_V_I0_1:%.*]] = insertelement <2 x double> undef, double [[A1:%.*]], i32 0
-; CHECK-NEXT:    [[X1_V_I0_2:%.*]] = insertelement <2 x double> [[X1_V_I0_1]], double [[A2:%.*]], i32 1
-; CHECK-NEXT:    [[X1:%.*]] = fsub <2 x double> [[X1_V_I0_2]], [[X1_V_I1_2]]
-; CHECK-NEXT:    [[Y1:%.*]] = fmul <2 x double> [[X1]], [[X1_V_I0_2]]
-; CHECK-NEXT:    [[Z1_V_I1_1:%.*]] = insertelement <2 x double> undef, double [[B2]], i32 0
-; CHECK-NEXT:    [[Z1_V_I1_2:%.*]] = insertelement <2 x double> [[Z1_V_I1_1]], double [[B1]], i32 1
-; CHECK-NEXT:    [[Z2:%.*]] = fadd <2 x double> [[Y1]], [[Z1_V_I1_2]]
-; CHECK-NEXT:    [[Z2_V_R1:%.*]] = extractelement <2 x double> [[Z2]], i32 0
-; CHECK-NEXT:    [[Z2_V_R2:%.*]] = extractelement <2 x double> [[Z2]], i32 1
-; CHECK-NEXT:    [[R:%.*]] = fmul double [[Z2_V_R2]], [[Z2_V_R1]]
-; CHECK-NEXT:    ret double [[R]]
-;
-  %X1 = fsub double %A1, %B1
-  %X2 = fsub double %A2, %B2
-  %Y1 = fmul double %X1, %A1
-  %Y2 = fmul double %X2, %A2
-  %Z1 = fadd double %Y2, %B1
-  %Z2 = fadd double %Y1, %B2
-  %W1 = fadd double %Y2, %Z1
-  %W2 = fadd double %Y1, %Z2
-  %R  = fmul double %Z1, %Z2
-  ret double %R
-}
-
-; Basic chain with shuffles
-define <8 x i8> @test6(<8 x i8> %A1, <8 x i8> %A2, <8 x i8> %B1, <8 x i8> %B2) {
-; CHECK-LABEL: @test6(
-; CHECK-NEXT:    [[X1:%.*]] = sub <8 x i8> [[A1:%.*]], [[B1:%.*]]
-; CHECK-NEXT:    [[X2:%.*]] = sub <8 x i8> [[A2:%.*]], [[B2:%.*]]
-; CHECK-NEXT:    [[Y1:%.*]] = mul <8 x i8> [[X1]], [[A1]]
-; CHECK-NEXT:    [[Y2:%.*]] = mul <8 x i8> [[X2]], [[A2]]
-; CHECK-NEXT:    [[Z1:%.*]] = add <8 x i8> [[Y1]], [[B1]]
-; CHECK-NEXT:    [[Z2:%.*]] = add <8 x i8> [[Y2]], [[B2]]
-; CHECK-NEXT:    [[Q1:%.*]] = shufflevector <8 x i8> [[Z1]], <8 x i8> [[Z2]], <8 x i32> <i32 15, i32 8, i32 6, i32 1, i32 13, i32 10, i32 4, i32 3>
-; CHECK-NEXT:    [[Q2:%.*]] = shufflevector <8 x i8> [[Z2]], <8 x i8> undef, <8 x i32> <i32 6, i32 7, i32 0, i32 1, i32 2, i32 4, i32 4, i32 1>
-; CHECK-NEXT:    [[R:%.*]] = mul <8 x i8> [[Q1]], [[Q2]]
-; CHECK-NEXT:    ret <8 x i8> [[R]]
-;
-  %X1 = sub <8 x i8> %A1, %B1
-  %X2 = sub <8 x i8> %A2, %B2
-  %Y1 = mul <8 x i8> %X1, %A1
-  %Y2 = mul <8 x i8> %X2, %A2
-  %Z1 = add <8 x i8> %Y1, %B1
-  %Z2 = add <8 x i8> %Y2, %B2
-  %Q1 = shufflevector <8 x i8> %Z1, <8 x i8> %Z2, <8 x i32> <i32 15, i32 8, i32 6, i32 1, i32 13, i32 10, i32 4, i32 3>
-  %Q2 = shufflevector <8 x i8> %Z2, <8 x i8> %Z2, <8 x i32> <i32 6, i32 7, i32 0, i32 1, i32 2, i32 4, i32 4, i32 1>
-  %R  = mul <8 x i8> %Q1, %Q2
-  ret <8 x i8> %R
-}
-
--- a/llvm/test/Transforms/BBVectorize/X86/vs-cast.ll
+++ b/llvm/test/Transforms/BBVectorize/X86/vs-cast.ll
@ -1,19 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -bb-vectorize -S | FileCheck %s
-
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
-
-define void @main() nounwind uwtable {
-; CHECK-LABEL: @main(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x i64> undef to i128
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> undef to i128
-; CHECK-NEXT:    ret void
-;
-entry:
-  %0 = bitcast <2 x i64> undef to i128
-  %1 = bitcast <2 x i64> undef to i128
-  ret void
-}
-
--- a/llvm/test/Transforms/BBVectorize/X86/wr-aliases.ll
+++ b/llvm/test/Transforms/BBVectorize/X86/wr-aliases.ll
@ -1,144 +0,0 @@
-; RUN: opt -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx -disable-basicaa -bb-vectorize -S < %s | FileCheck %s
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
-
-%class.QBezier.15 = type { double, double, double, double, double, double, double, double }
-
-; Function Attrs: nounwind
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #0
-
-; Function Attrs: uwtable
-declare fastcc void @_ZL12printQBezier7QBezier(%class.QBezier.15* byval nocapture readonly align 8) #1
-
-; Function Attrs: nounwind
-declare void @llvm.lifetime.start(i64, i8* nocapture) #0
-
-; Function Attrs: nounwind
-declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #0
-
-define void @main_arrayctor.cont([10 x %class.QBezier.15]* %beziers, %class.QBezier.15* %agg.tmp.i, %class.QBezier.15* %agg.tmp55.i, %class.QBezier.15* %agg.tmp56.i) {
-newFuncRoot:
-  br label %arrayctor.cont
-
-arrayctor.cont.ret.exitStub:                      ; preds = %arrayctor.cont
-  ret void
-
-; CHECK-LABEL: @main_arrayctor.cont
-; CHECK: <2 x double>
-; CHECK: @_ZL12printQBezier7QBezier
-; CHECK: store double %mul8.i, double* %x3.i, align 16
-; CHECK: load double, double* %x3.i, align 16
-; CHECK: ret
-
-arrayctor.cont:                                   ; preds = %newFuncRoot
-  %ref.tmp.sroa.0.0.idx = getelementptr inbounds [10 x %class.QBezier.15], [10 x %class.QBezier.15]* %beziers, i64 0, i64 0, i32 0
-  store double 1.000000e+01, double* %ref.tmp.sroa.0.0.idx, align 16
-  %ref.tmp.sroa.2.0.idx1 = getelementptr inbounds [10 x %class.QBezier.15], [10 x %class.QBezier.15]* %beziers, i64 0, i64 0, i32 1
-  store double 2.000000e+01, double* %ref.tmp.sroa.2.0.idx1, align 8
-  %ref.tmp.sroa.3.0.idx2 = getelementptr inbounds [10 x %class.QBezier.15], [10 x %class.QBezier.15]* %beziers, i64 0, i64 0, i32 2
-  store double 3.000000e+01, double* %ref.tmp.sroa.3.0.idx2, align 16
-  %ref.tmp.sroa.4.0.idx3 = getelementptr inbounds [10 x %class.QBezier.15], [10 x %class.QBezier.15]* %beziers, i64 0, i64 0, i32 3
-  store double 4.000000e+01, double* %ref.tmp.sroa.4.0.idx3, align 8
-  %ref.tmp.sroa.5.0.idx4 = getelementptr inbounds [10 x %class.QBezier.15], [10 x %class.QBezier.15]* %beziers, i64 0, i64 0, i32 4
-  store double 5.000000e+01, double* %ref.tmp.sroa.5.0.idx4, align 16
-  %ref.tmp.sroa.6.0.idx5 = getelementptr inbounds [10 x %class.QBezier.15], [10 x %class.QBezier.15]* %beziers, i64 0, i64 0, i32 5
-  store double 6.000000e+01, double* %ref.tmp.sroa.6.0.idx5, align 8
-  %ref.tmp.sroa.7.0.idx6 = getelementptr inbounds [10 x %class.QBezier.15], [10 x %class.QBezier.15]* %beziers, i64 0, i64 0, i32 6
-  store double 7.000000e+01, double* %ref.tmp.sroa.7.0.idx6, align 16
-  %ref.tmp.sroa.8.0.idx7 = getelementptr inbounds [10 x %class.QBezier.15], [10 x %class.QBezier.15]* %beziers, i64 0, i64 0, i32 7
-  store double 8.000000e+01, double* %ref.tmp.sroa.8.0.idx7, align 8
-  %add.ptr = getelementptr inbounds [10 x %class.QBezier.15], [10 x %class.QBezier.15]* %beziers, i64 0, i64 1
-  %v0 = bitcast %class.QBezier.15* %agg.tmp.i to i8*
-  call void @llvm.lifetime.start(i64 64, i8* %v0)
-  %v1 = bitcast %class.QBezier.15* %agg.tmp55.i to i8*
-  call void @llvm.lifetime.start(i64 64, i8* %v1)
-  %v2 = bitcast %class.QBezier.15* %agg.tmp56.i to i8*
-  call void @llvm.lifetime.start(i64 64, i8* %v2)
-  %v3 = bitcast [10 x %class.QBezier.15]* %beziers to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %v0, i8* %v3, i64 64, i32 8, i1 false)
-  call fastcc void @_ZL12printQBezier7QBezier(%class.QBezier.15* byval align 8 %agg.tmp.i)
-  %x2.i = getelementptr inbounds [10 x %class.QBezier.15], [10 x %class.QBezier.15]* %beziers, i64 0, i64 0, i32 2
-  %v4 = load double, double* %x2.i, align 16
-  %x3.i = getelementptr inbounds [10 x %class.QBezier.15], [10 x %class.QBezier.15]* %beziers, i64 0, i64 0, i32 4
-  %v5 = load double, double* %x3.i, align 16
-  %add.i = fadd double %v4, %v5
-  %mul.i = fmul double 5.000000e-01, %add.i
-  %x1.i = getelementptr inbounds [10 x %class.QBezier.15], [10 x %class.QBezier.15]* %beziers, i64 0, i64 0, i32 0
-  %v6 = load double, double* %x1.i, align 16
-  %add3.i = fadd double %v4, %v6
-  %mul4.i = fmul double 5.000000e-01, %add3.i
-  %x25.i = getelementptr inbounds [10 x %class.QBezier.15], [10 x %class.QBezier.15]* %beziers, i64 0, i64 1, i32 2
-  store double %mul4.i, double* %x25.i, align 16
-  %v7 = load double, double* %x3.i, align 16
-  %x4.i = getelementptr inbounds [10 x %class.QBezier.15], [10 x %class.QBezier.15]* %beziers, i64 0, i64 0, i32 6
-  %v8 = load double, double* %x4.i, align 16
-  %add7.i = fadd double %v7, %v8
-  %mul8.i = fmul double 5.000000e-01, %add7.i
-  store double %mul8.i, double* %x3.i, align 16
-  %v9 = load double, double* %x1.i, align 16
-  %x111.i = getelementptr inbounds %class.QBezier.15, %class.QBezier.15* %add.ptr, i64 0, i32 0
-  store double %v9, double* %x111.i, align 16
-  %v10 = load double, double* %x25.i, align 16
-  %add15.i = fadd double %mul.i, %v10
-  %mul16.i = fmul double 5.000000e-01, %add15.i
-  %x317.i = getelementptr inbounds [10 x %class.QBezier.15], [10 x %class.QBezier.15]* %beziers, i64 0, i64 1, i32 4
-  store double %mul16.i, double* %x317.i, align 16
-  %v11 = load double, double* %x3.i, align 16
-  %add19.i = fadd double %mul.i, %v11
-  %mul20.i = fmul double 5.000000e-01, %add19.i
-  store double %mul20.i, double* %x2.i, align 16
-  %v12 = load double, double* %x317.i, align 16
-  %add24.i = fadd double %v12, %mul20.i
-  %mul25.i = fmul double 5.000000e-01, %add24.i
-  store double %mul25.i, double* %x1.i, align 16
-  %x427.i = getelementptr inbounds [10 x %class.QBezier.15], [10 x %class.QBezier.15]* %beziers, i64 0, i64 1, i32 6
-  store double %mul25.i, double* %x427.i, align 16
-  %y2.i = getelementptr inbounds [10 x %class.QBezier.15], [10 x %class.QBezier.15]* %beziers, i64 0, i64 0, i32 3
-  %v13 = load double, double* %y2.i, align 8
-  %y3.i = getelementptr inbounds [10 x %class.QBezier.15], [10 x %class.QBezier.15]* %beziers, i64 0, i64 0, i32 5
-  %v14 = load double, double* %y3.i, align 8
-  %add28.i = fadd double %v13, %v14
-  %div.i = fmul double 5.000000e-01, %add28.i
-  %y1.i = getelementptr inbounds [10 x %class.QBezier.15], [10 x %class.QBezier.15]* %beziers, i64 0, i64 0, i32 1
-  %v15 = load double, double* %y1.i, align 8
-  %add30.i = fadd double %v13, %v15
-  %mul31.i = fmul double 5.000000e-01, %add30.i
-  %y232.i = getelementptr inbounds [10 x %class.QBezier.15], [10 x %class.QBezier.15]* %beziers, i64 0, i64 1, i32 3
-  store double %mul31.i, double* %y232.i, align 8
-  %v16 = load double, double* %y3.i, align 8
-  %y4.i = getelementptr inbounds [10 x %class.QBezier.15], [10 x %class.QBezier.15]* %beziers, i64 0, i64 0, i32 7
-  %v17 = load double, double* %y4.i, align 8
-  %add34.i = fadd double %v16, %v17
-  %mul35.i = fmul double 5.000000e-01, %add34.i
-  store double %mul35.i, double* %y3.i, align 8
-  %v18 = load double, double* %y1.i, align 8
-  %y138.i = getelementptr inbounds [10 x %class.QBezier.15], [10 x %class.QBezier.15]* %beziers, i64 0, i64 1, i32 1
-  store double %v18, double* %y138.i, align 8
-  %v19 = load double, double* %y232.i, align 8
-  %add42.i = fadd double %div.i, %v19
-  %mul43.i = fmul double 5.000000e-01, %add42.i
-  %y344.i = getelementptr inbounds [10 x %class.QBezier.15], [10 x %class.QBezier.15]* %beziers, i64 0, i64 1, i32 5
-  store double %mul43.i, double* %y344.i, align 8
-  %v20 = load double, double* %y3.i, align 8
-  %add46.i = fadd double %div.i, %v20
-  %mul47.i = fmul double 5.000000e-01, %add46.i
-  store double %mul47.i, double* %y2.i, align 8
-  %v21 = load double, double* %y344.i, align 8
-  %add51.i = fadd double %v21, %mul47.i
-  %mul52.i = fmul double 5.000000e-01, %add51.i
-  store double %mul52.i, double* %y1.i, align 8
-  %y454.i = getelementptr inbounds [10 x %class.QBezier.15], [10 x %class.QBezier.15]* %beziers, i64 0, i64 1, i32 7
-  store double %mul52.i, double* %y454.i, align 8
-  %v22 = bitcast %class.QBezier.15* %add.ptr to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %v1, i8* %v22, i64 64, i32 8, i1 false)
-  call fastcc void @_ZL12printQBezier7QBezier(%class.QBezier.15* byval align 8 %agg.tmp55.i)
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %v2, i8* %v3, i64 64, i32 8, i1 false)
-  call fastcc void @_ZL12printQBezier7QBezier(%class.QBezier.15* byval align 8 %agg.tmp56.i)
-  call void @llvm.lifetime.end.p0i8(i64 64, i8* %v0)
-  call void @llvm.lifetime.end.p0i8(i64 64, i8* %v1)
-  call void @llvm.lifetime.end.p0i8(i64 64, i8* %v2)
-  br label %arrayctor.cont.ret.exitStub
-}
-
-attributes #0 = { nounwind }
-attributes #1 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
--- a/llvm/test/Transforms/BBVectorize/cycle.ll
+++ b/llvm/test/Transforms/BBVectorize/cycle.ll
@ -1,112 +0,0 @@
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
-; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-ignore-target-info -instcombine -gvn -S | FileCheck %s
-
-; This test checks the non-trivial pairing-induced cycle avoidance. Without this cycle avoidance, the algorithm would otherwise
-; want to select the pairs:
-; %div77 = fdiv double %sub74, %mul76.v.r1 <->   %div125 = fdiv double %mul121, %mul76.v.r2 (div125 depends on mul117)
-; %add84 = fadd double %sub83, 2.000000e+00 <->   %add127 = fadd double %mul126, 1.000000e+00 (add127 depends on div77)
-; %mul95 = fmul double %sub45.v.r1, %sub36.v.r1 <->   %mul88 = fmul double %sub36.v.r1, %sub87 (mul88 depends on add84)
-; %mul117 = fmul double %sub39.v.r1, %sub116 <->   %mul97 = fmul double %mul96, %sub39.v.r1 (mul97 depends on mul95)
-; and so a dependency cycle would be created.
-
-declare double @fabs(double) nounwind readnone
-define void @test1(double %a, double %b, double %c, double %add80, double %mul1, double %mul2.v.r1, double %mul73, double %sub, double %sub65, double %F.0, i32 %n.0, double %Bnm3.0, double %Bnm2.0, double %Bnm1.0, double %Anm3.0, double %Anm2.0, double %Anm1.0) {
-entry:
-  br label %go
-go:
-  %conv = sitofp i32 %n.0 to double
-  %add35 = fadd double %conv, %a
-  %sub36 = fadd double %add35, -1.000000e+00
-  %add38 = fadd double %conv, %b
-  %sub39 = fadd double %add38, -1.000000e+00
-  %add41 = fadd double %conv, %c
-  %sub42 = fadd double %add41, -1.000000e+00
-  %sub45 = fadd double %add35, -2.000000e+00
-  %sub48 = fadd double %add38, -2.000000e+00
-  %sub51 = fadd double %add41, -2.000000e+00
-  %mul52 = shl nsw i32 %n.0, 1
-  %sub53 = add nsw i32 %mul52, -1
-  %conv54 = sitofp i32 %sub53 to double
-  %sub56 = add nsw i32 %mul52, -3
-  %conv57 = sitofp i32 %sub56 to double
-  %sub59 = add nsw i32 %mul52, -5
-  %conv60 = sitofp i32 %sub59 to double
-  %mul61 = mul nsw i32 %n.0, %n.0
-  %conv62 = sitofp i32 %mul61 to double
-  %mul63 = fmul double %conv62, 3.000000e+00
-  %mul67 = fmul double %sub65, %conv
-  %add68 = fadd double %mul63, %mul67
-  %add69 = fadd double %add68, 2.000000e+00
-  %sub71 = fsub double %add69, %mul2.v.r1
-  %sub74 = fsub double %sub71, %mul73
-  %mul75 = fmul double %conv57, 2.000000e+00
-  %mul76 = fmul double %mul75, %sub42
-  %div77 = fdiv double %sub74, %mul76
-  %mul82 = fmul double %add80, %conv
-  %sub83 = fsub double %mul63, %mul82
-  %add84 = fadd double %sub83, 2.000000e+00
-  %sub86 = fsub double %add84, %mul2.v.r1
-  %sub87 = fsub double -0.000000e+00, %sub86
-  %mul88 = fmul double %sub36, %sub87
-  %mul89 = fmul double %mul88, %sub39
-  %mul90 = fmul double %conv54, 4.000000e+00
-  %mul91 = fmul double %mul90, %conv57
-  %mul92 = fmul double %mul91, %sub51
-  %mul93 = fmul double %mul92, %sub42
-  %div94 = fdiv double %mul89, %mul93
-  %mul95 = fmul double %sub45, %sub36
-  %mul96 = fmul double %mul95, %sub48
-  %mul97 = fmul double %mul96, %sub39
-  %sub99 = fsub double %conv, %a
-  %sub100 = fadd double %sub99, -2.000000e+00
-  %mul101 = fmul double %mul97, %sub100
-  %sub103 = fsub double %conv, %b
-  %sub104 = fadd double %sub103, -2.000000e+00
-  %mul105 = fmul double %mul101, %sub104
-  %mul106 = fmul double %conv57, 8.000000e+00
-  %mul107 = fmul double %mul106, %conv57
-  %mul108 = fmul double %mul107, %conv60
-  %sub111 = fadd double %add41, -3.000000e+00
-  %mul112 = fmul double %mul108, %sub111
-  %mul113 = fmul double %mul112, %sub51
-  %mul114 = fmul double %mul113, %sub42
-  %div115 = fdiv double %mul105, %mul114
-  %sub116 = fsub double -0.000000e+00, %sub36
-  %mul117 = fmul double %sub39, %sub116
-  %sub119 = fsub double %conv, %c
-  %sub120 = fadd double %sub119, -1.000000e+00
-  %mul121 = fmul double %mul117, %sub120
-  %mul123 = fmul double %mul75, %sub51
-  %mul124 = fmul double %mul123, %sub42
-  %div125 = fdiv double %mul121, %mul124
-  %mul126 = fmul double %div77, %sub
-  %add127 = fadd double %mul126, 1.000000e+00
-  %mul128 = fmul double %add127, %Anm1.0
-  %mul129 = fmul double %div94, %sub
-  %add130 = fadd double %div125, %mul129
-  %mul131 = fmul double %add130, %sub
-  %mul132 = fmul double %mul131, %Anm2.0
-  %add133 = fadd double %mul128, %mul132
-  %mul134 = fmul double %div115, %mul1
-  %mul135 = fmul double %mul134, %Anm3.0
-  %add136 = fadd double %add133, %mul135
-  %mul139 = fmul double %add127, %Bnm1.0
-  %mul143 = fmul double %mul131, %Bnm2.0
-  %add144 = fadd double %mul139, %mul143
-  %mul146 = fmul double %mul134, %Bnm3.0
-  %add147 = fadd double %add144, %mul146
-  %div148 = fdiv double %add136, %add147
-  %sub149 = fsub double %F.0, %div148
-  %div150 = fdiv double %sub149, %F.0
-  %call = tail call double @fabs(double %div150) nounwind readnone
-  %cmp = fcmp olt double %call, 0x3CB0000000000000
-  %cmp152 = icmp sgt i32 %n.0, 20000
-  %or.cond = or i1 %cmp, %cmp152
-  br i1 %or.cond, label %done, label %go
-done:
-  ret void
-; CHECK-LABEL: @test1(
-; CHECK: go:
-; CHECK: %conv.v.i0.1 = insertelement <2 x i32> undef, i32 %n.0, i32 0
-; FIXME: When tree pruning is deterministic, include the entire output.
-}
--- a/llvm/test/Transforms/BBVectorize/func-alias.ll
+++ b/llvm/test/Transforms/BBVectorize/func-alias.ll
@ -1,244 +0,0 @@
-target datalayout = "e-p:64:64:64-S128-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f16:16:16-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-f128:128:128-n8:16:32:64"
-target triple = "x86_64-unknown-linux-gnu"
-; RUN: opt < %s -basicaa -bb-vectorize -bb-vectorize-req-chain-depth=2 -instcombine -gvn -S | FileCheck %s
-; The chain length is set to 2 so that this will do some vectorization; check that the order of the functions is unchanged.
-
-%struct.descriptor_dimension = type { i64, i64, i64 }
-%struct.__st_parameter_common = type { i32, i32, i8*, i32, i32, i8*, i32* }
-%struct.__st_parameter_dt = type { %struct.__st_parameter_common, i64, i64*, i64*, i8*, i8*, i32, i32, i8*, i8*, i32, i32, i8*, [256 x i8], i32*, i64, i8*, i32, i32, i8*, i8*, i32, i32, i8*, i8*, i32, i32, i8*, i8*, i32, [4 x i8] }
-%"struct.array4_real(kind=4)" = type { i8*, i64, i64, [4 x %struct.descriptor_dimension] }
-%"struct.array4_integer(kind=4).73" = type { i8*, i64, i64, [4 x %struct.descriptor_dimension] }
-%struct.array4_unknown = type { i8*, i64, i64, [4 x %struct.descriptor_dimension] }
-
-@.cst4 = external unnamed_addr constant [11 x i8], align 8
-@.cst823 = external unnamed_addr constant [214 x i8], align 64
-@j.4580 = external global i32
-@j1.4581 = external global i32
-@nty1.4590 = external global [2 x i8]
-@nty2.4591 = external global [2 x i8]
-@xr1.4592 = external global float
-@xr2.4593 = external global float
-@yr1.4594 = external global float
-@yr2.4595 = external global float
-
-@__main1_MOD_iave = external unnamed_addr global i32
-@__main1_MOD_igrp = external global i32
-@__main1_MOD_iounit = external global i32
-@__main1_MOD_ityp = external global i32
-@__main1_MOD_mclmsg = external unnamed_addr global %struct.array4_unknown, align 32
-@__main1_MOD_mxdate = external unnamed_addr global %"struct.array4_integer(kind=4).73", align 32
-@__main1_MOD_rmxval = external unnamed_addr global %"struct.array4_real(kind=4)", align 32
-
-declare void @_gfortran_st_write(%struct.__st_parameter_dt*)
-declare void @_gfortran_st_write_done(%struct.__st_parameter_dt*)
-declare void @_gfortran_transfer_character_write(%struct.__st_parameter_dt*, i8*, i32)
-declare void @_gfortran_transfer_integer_write(%struct.__st_parameter_dt*, i8*, i32)
-declare void @_gfortran_transfer_real_write(%struct.__st_parameter_dt*, i8*, i32)
-
-define i1 @"prtmax__<bb 3>_<bb 34>"(%struct.__st_parameter_dt* %memtmp3, i32 %D.4627_188.reload) nounwind {
-; CHECK: prtmax__
-newFuncRoot:
-  br label %"<bb 34>"
-
-codeRepl80.exitStub:                              ; preds = %"<bb 34>"
-  ret i1 true
-
-"<bb 34>.<bb 25>_crit_edge.exitStub":             ; preds = %"<bb 34>"
-  ret i1 false
-
-"<bb 34>":                                        ; preds = %newFuncRoot
-  %tmp128 = getelementptr inbounds %struct.__st_parameter_dt, %struct.__st_parameter_dt* %memtmp3, i32 0, i32 0
-  %tmp129 = getelementptr inbounds %struct.__st_parameter_common, %struct.__st_parameter_common* %tmp128, i32 0, i32 2
-  store i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.cst4, i64 0, i64 0), i8** %tmp129, align 8
-  %tmp130 = getelementptr inbounds %struct.__st_parameter_dt, %struct.__st_parameter_dt* %memtmp3, i32 0, i32 0
-  %tmp131 = getelementptr inbounds %struct.__st_parameter_common, %struct.__st_parameter_common* %tmp130, i32 0, i32 3
-  store i32 31495, i32* %tmp131, align 4
-  %tmp132 = getelementptr inbounds %struct.__st_parameter_dt, %struct.__st_parameter_dt* %memtmp3, i32 0, i32 5
-  store i8* getelementptr inbounds ([214 x i8], [214 x i8]* @.cst823, i64 0, i64 0), i8** %tmp132, align 8
-  %tmp133 = getelementptr inbounds %struct.__st_parameter_dt, %struct.__st_parameter_dt* %memtmp3, i32 0, i32 6
-  store i32 214, i32* %tmp133, align 4
-  %tmp134 = getelementptr inbounds %struct.__st_parameter_dt, %struct.__st_parameter_dt* %memtmp3, i32 0, i32 0
-  %tmp135 = getelementptr inbounds %struct.__st_parameter_common, %struct.__st_parameter_common* %tmp134, i32 0, i32 0
-  store i32 4096, i32* %tmp135, align 4
-  %iounit.8748_288 = load i32, i32* @__main1_MOD_iounit, align 4
-  %tmp136 = getelementptr inbounds %struct.__st_parameter_dt, %struct.__st_parameter_dt* %memtmp3, i32 0, i32 0
-  %tmp137 = getelementptr inbounds %struct.__st_parameter_common, %struct.__st_parameter_common* %tmp136, i32 0, i32 1
-  store i32 %iounit.8748_288, i32* %tmp137, align 4
-  call void @_gfortran_st_write(%struct.__st_parameter_dt* %memtmp3) nounwind
-  call void bitcast (void (%struct.__st_parameter_dt*, i8*, i32)* @_gfortran_transfer_integer_write to void (%struct.__st_parameter_dt*, i32*, i32)*)(%struct.__st_parameter_dt* %memtmp3, i32* @j.4580, i32 4) nounwind
-; CHECK: @_gfortran_transfer_integer_write
-  %D.75807_289 = load i8*, i8** getelementptr inbounds (%"struct.array4_real(kind=4)", %"struct.array4_real(kind=4)"* @__main1_MOD_rmxval, i64 0, i32 0), align 8
-  %j.8758_290 = load i32, i32* @j.4580, align 4
-  %D.75760_291 = sext i32 %j.8758_290 to i64
-  %iave.8736_292 = load i32, i32* @__main1_MOD_iave, align 4
-  %D.75620_293 = sext i32 %iave.8736_292 to i64
-  %D.75808_294 = load i64, i64* getelementptr inbounds (%"struct.array4_real(kind=4)", %"struct.array4_real(kind=4)"* @__main1_MOD_rmxval, i64 0, i32 3, i64 2, i32 0), align 8
-  %D.75809_295 = mul nsw i64 %D.75620_293, %D.75808_294
-  %igrp.8737_296 = load i32, i32* @__main1_MOD_igrp, align 4
-  %D.75635_297 = sext i32 %igrp.8737_296 to i64
-  %D.75810_298 = load i64, i64* getelementptr inbounds (%"struct.array4_real(kind=4)", %"struct.array4_real(kind=4)"* @__main1_MOD_rmxval, i64 0, i32 3, i64 1, i32 0), align 8
-  %D.75811_299 = mul nsw i64 %D.75635_297, %D.75810_298
-  %D.75812_300 = add nsw i64 %D.75809_295, %D.75811_299
-  %D.75813_301 = add nsw i64 %D.75760_291, %D.75812_300
-  %ityp.8750_302 = load i32, i32* @__main1_MOD_ityp, align 4
-  %D.75704_303 = sext i32 %ityp.8750_302 to i64
-  %D.75814_304 = load i64, i64* getelementptr inbounds (%"struct.array4_real(kind=4)", %"struct.array4_real(kind=4)"* @__main1_MOD_rmxval, i64 0, i32 3, i64 3, i32 0), align 8
-  %D.75815_305 = mul nsw i64 %D.75704_303, %D.75814_304
-  %D.75816_306 = add nsw i64 %D.75813_301, %D.75815_305
-  %D.75817_307 = load i64, i64* getelementptr inbounds (%"struct.array4_real(kind=4)", %"struct.array4_real(kind=4)"* @__main1_MOD_rmxval, i64 0, i32 1), align 8
-  %D.75818_308 = add nsw i64 %D.75816_306, %D.75817_307
-  %tmp138 = bitcast i8* %D.75807_289 to [0 x float]*
-  %tmp139 = bitcast [0 x float]* %tmp138 to float*
-  %D.75819_309 = getelementptr inbounds float, float* %tmp139, i64 %D.75818_308
-  call void bitcast (void (%struct.__st_parameter_dt*, i8*, i32)* @_gfortran_transfer_real_write to void (%struct.__st_parameter_dt*, float*, i32)*)(%struct.__st_parameter_dt* %memtmp3, float* %D.75819_309, i32 4) nounwind
-; CHECK: @_gfortran_transfer_real_write
-  %D.75820_310 = load i8*, i8** getelementptr inbounds (%struct.array4_unknown, %struct.array4_unknown* @__main1_MOD_mclmsg, i64 0, i32 0), align 8
-  %j.8758_311 = load i32, i32* @j.4580, align 4
-  %D.75760_312 = sext i32 %j.8758_311 to i64
-  %iave.8736_313 = load i32, i32* @__main1_MOD_iave, align 4
-  %D.75620_314 = sext i32 %iave.8736_313 to i64
-  %D.75821_315 = load i64, i64* getelementptr inbounds (%struct.array4_unknown, %struct.array4_unknown* @__main1_MOD_mclmsg, i64 0, i32 3, i64 2, i32 0), align 8
-  %D.75822_316 = mul nsw i64 %D.75620_314, %D.75821_315
-  %igrp.8737_317 = load i32, i32* @__main1_MOD_igrp, align 4
-  %D.75635_318 = sext i32 %igrp.8737_317 to i64
-  %D.75823_319 = load i64, i64* getelementptr inbounds (%struct.array4_unknown, %struct.array4_unknown* @__main1_MOD_mclmsg, i64 0, i32 3, i64 1, i32 0), align 8
-  %D.75824_320 = mul nsw i64 %D.75635_318, %D.75823_319
-  %D.75825_321 = add nsw i64 %D.75822_316, %D.75824_320
-  %D.75826_322 = add nsw i64 %D.75760_312, %D.75825_321
-  %ityp.8750_323 = load i32, i32* @__main1_MOD_ityp, align 4
-  %D.75704_324 = sext i32 %ityp.8750_323 to i64
-  %D.75827_325 = load i64, i64* getelementptr inbounds (%struct.array4_unknown, %struct.array4_unknown* @__main1_MOD_mclmsg, i64 0, i32 3, i64 3, i32 0), align 8
-  %D.75828_326 = mul nsw i64 %D.75704_324, %D.75827_325
-  %D.75829_327 = add nsw i64 %D.75826_322, %D.75828_326
-  %D.75830_328 = load i64, i64* getelementptr inbounds (%struct.array4_unknown, %struct.array4_unknown* @__main1_MOD_mclmsg, i64 0, i32 1), align 8
-  %D.75831_329 = add nsw i64 %D.75829_327, %D.75830_328
-  %tmp140 = bitcast i8* %D.75820_310 to [0 x [1 x i8]]*
-  %tmp141 = bitcast [0 x [1 x i8]]* %tmp140 to [1 x i8]*
-  %D.75832_330 = getelementptr inbounds [1 x i8], [1 x i8]* %tmp141, i64 %D.75831_329
-  call void bitcast (void (%struct.__st_parameter_dt*, i8*, i32)* @_gfortran_transfer_character_write to void (%struct.__st_parameter_dt*, [1 x i8]*, i32)*)(%struct.__st_parameter_dt* %memtmp3, [1 x i8]* %D.75832_330, i32 1) nounwind
-; CHECK: @_gfortran_transfer_character_write
-  %D.75833_331 = load i8*, i8** getelementptr inbounds (%"struct.array4_integer(kind=4).73", %"struct.array4_integer(kind=4).73"* @__main1_MOD_mxdate, i64 0, i32 0), align 8
-  %j.8758_332 = load i32, i32* @j.4580, align 4
-  %D.75760_333 = sext i32 %j.8758_332 to i64
-  %iave.8736_334 = load i32, i32* @__main1_MOD_iave, align 4
-  %D.75620_335 = sext i32 %iave.8736_334 to i64
-  %D.75834_336 = load i64, i64* getelementptr inbounds (%"struct.array4_integer(kind=4).73", %"struct.array4_integer(kind=4).73"* @__main1_MOD_mxdate, i64 0, i32 3, i64 2, i32 0), align 8
-  %D.75835_337 = mul nsw i64 %D.75620_335, %D.75834_336
-  %igrp.8737_338 = load i32, i32* @__main1_MOD_igrp, align 4
-  %D.75635_339 = sext i32 %igrp.8737_338 to i64
-  %D.75836_340 = load i64, i64* getelementptr inbounds (%"struct.array4_integer(kind=4).73", %"struct.array4_integer(kind=4).73"* @__main1_MOD_mxdate, i64 0, i32 3, i64 1, i32 0), align 8
-  %D.75837_341 = mul nsw i64 %D.75635_339, %D.75836_340
-  %D.75838_342 = add nsw i64 %D.75835_337, %D.75837_341
-  %D.75839_343 = add nsw i64 %D.75760_333, %D.75838_342
-  %ityp.8750_344 = load i32, i32* @__main1_MOD_ityp, align 4
-  %D.75704_345 = sext i32 %ityp.8750_344 to i64
-  %D.75840_346 = load i64, i64* getelementptr inbounds (%"struct.array4_integer(kind=4).73", %"struct.array4_integer(kind=4).73"* @__main1_MOD_mxdate, i64 0, i32 3, i64 3, i32 0), align 8
-  %D.75841_347 = mul nsw i64 %D.75704_345, %D.75840_346
-  %D.75842_348 = add nsw i64 %D.75839_343, %D.75841_347
-  %D.75843_349 = load i64, i64* getelementptr inbounds (%"struct.array4_integer(kind=4).73", %"struct.array4_integer(kind=4).73"* @__main1_MOD_mxdate, i64 0, i32 1), align 8
-  %D.75844_350 = add nsw i64 %D.75842_348, %D.75843_349
-  %tmp142 = bitcast i8* %D.75833_331 to [0 x i32]*
-  %tmp143 = bitcast [0 x i32]* %tmp142 to i32*
-  %D.75845_351 = getelementptr inbounds i32, i32* %tmp143, i64 %D.75844_350
-  call void bitcast (void (%struct.__st_parameter_dt*, i8*, i32)* @_gfortran_transfer_integer_write to void (%struct.__st_parameter_dt*, i32*, i32)*)(%struct.__st_parameter_dt* %memtmp3, i32* %D.75845_351, i32 4) nounwind
-; CHECK: @_gfortran_transfer_integer_write
-  call void bitcast (void (%struct.__st_parameter_dt*, i8*, i32)* @_gfortran_transfer_real_write to void (%struct.__st_parameter_dt*, float*, i32)*)(%struct.__st_parameter_dt* %memtmp3, float* @xr1.4592, i32 4) nounwind
-; CHECK: @_gfortran_transfer_real_write
-  call void bitcast (void (%struct.__st_parameter_dt*, i8*, i32)* @_gfortran_transfer_real_write to void (%struct.__st_parameter_dt*, float*, i32)*)(%struct.__st_parameter_dt* %memtmp3, float* @yr1.4594, i32 4) nounwind
-; CHECK: @_gfortran_transfer_real_write
-  call void bitcast (void (%struct.__st_parameter_dt*, i8*, i32)* @_gfortran_transfer_character_write to void (%struct.__st_parameter_dt*, [2 x i8]*, i32)*)(%struct.__st_parameter_dt* %memtmp3, [2 x i8]* @nty1.4590, i32 2) nounwind
-; CHECK: @_gfortran_transfer_character_write
-  call void bitcast (void (%struct.__st_parameter_dt*, i8*, i32)* @_gfortran_transfer_integer_write to void (%struct.__st_parameter_dt*, i32*, i32)*)(%struct.__st_parameter_dt* %memtmp3, i32* @j1.4581, i32 4) nounwind
-; CHECK: @_gfortran_transfer_integer_write
-  %D.75807_352 = load i8*, i8** getelementptr inbounds (%"struct.array4_real(kind=4)", %"struct.array4_real(kind=4)"* @__main1_MOD_rmxval, i64 0, i32 0), align 8
-  %j1.8760_353 = load i32, i32* @j1.4581, align 4
-  %D.75773_354 = sext i32 %j1.8760_353 to i64
-  %iave.8736_355 = load i32, i32* @__main1_MOD_iave, align 4
-  %D.75620_356 = sext i32 %iave.8736_355 to i64
-  %D.75808_357 = load i64, i64* getelementptr inbounds (%"struct.array4_real(kind=4)", %"struct.array4_real(kind=4)"* @__main1_MOD_rmxval, i64 0, i32 3, i64 2, i32 0), align 8
-  %D.75809_358 = mul nsw i64 %D.75620_356, %D.75808_357
-  %igrp.8737_359 = load i32, i32* @__main1_MOD_igrp, align 4
-  %D.75635_360 = sext i32 %igrp.8737_359 to i64
-  %D.75810_361 = load i64, i64* getelementptr inbounds (%"struct.array4_real(kind=4)", %"struct.array4_real(kind=4)"* @__main1_MOD_rmxval, i64 0, i32 3, i64 1, i32 0), align 8
-  %D.75811_362 = mul nsw i64 %D.75635_360, %D.75810_361
-  %D.75812_363 = add nsw i64 %D.75809_358, %D.75811_362
-  %D.75846_364 = add nsw i64 %D.75773_354, %D.75812_363
-  %ityp.8750_365 = load i32, i32* @__main1_MOD_ityp, align 4
-  %D.75704_366 = sext i32 %ityp.8750_365 to i64
-  %D.75814_367 = load i64, i64* getelementptr inbounds (%"struct.array4_real(kind=4)", %"struct.array4_real(kind=4)"* @__main1_MOD_rmxval, i64 0, i32 3, i64 3, i32 0), align 8
-  %D.75815_368 = mul nsw i64 %D.75704_366, %D.75814_367
-  %D.75847_369 = add nsw i64 %D.75846_364, %D.75815_368
-  %D.75817_370 = load i64, i64* getelementptr inbounds (%"struct.array4_real(kind=4)", %"struct.array4_real(kind=4)"* @__main1_MOD_rmxval, i64 0, i32 1), align 8
-  %D.75848_371 = add nsw i64 %D.75847_369, %D.75817_370
-  %tmp144 = bitcast i8* %D.75807_352 to [0 x float]*
-  %tmp145 = bitcast [0 x float]* %tmp144 to float*
-  %D.75849_372 = getelementptr inbounds float, float* %tmp145, i64 %D.75848_371
-  call void bitcast (void (%struct.__st_parameter_dt*, i8*, i32)* @_gfortran_transfer_real_write to void (%struct.__st_parameter_dt*, float*, i32)*)(%struct.__st_parameter_dt* %memtmp3, float* %D.75849_372, i32 4) nounwind
-; CHECK: @_gfortran_transfer_real_write
-  %D.75820_373 = load i8*, i8** getelementptr inbounds (%struct.array4_unknown, %struct.array4_unknown* @__main1_MOD_mclmsg, i64 0, i32 0), align 8
-  %j1.8760_374 = load i32, i32* @j1.4581, align 4
-  %D.75773_375 = sext i32 %j1.8760_374 to i64
-  %iave.8736_376 = load i32, i32* @__main1_MOD_iave, align 4
-  %D.75620_377 = sext i32 %iave.8736_376 to i64
-  %D.75821_378 = load i64, i64* getelementptr inbounds (%struct.array4_unknown, %struct.array4_unknown* @__main1_MOD_mclmsg, i64 0, i32 3, i64 2, i32 0), align 8
-  %D.75822_379 = mul nsw i64 %D.75620_377, %D.75821_378
-  %igrp.8737_380 = load i32, i32* @__main1_MOD_igrp, align 4
-  %D.75635_381 = sext i32 %igrp.8737_380 to i64
-  %D.75823_382 = load i64, i64* getelementptr inbounds (%struct.array4_unknown, %struct.array4_unknown* @__main1_MOD_mclmsg, i64 0, i32 3, i64 1, i32 0), align 8
-  %D.75824_383 = mul nsw i64 %D.75635_381, %D.75823_382
-  %D.75825_384 = add nsw i64 %D.75822_379, %D.75824_383
-  %D.75850_385 = add nsw i64 %D.75773_375, %D.75825_384
-  %ityp.8750_386 = load i32, i32* @__main1_MOD_ityp, align 4
-  %D.75704_387 = sext i32 %ityp.8750_386 to i64
-  %D.75827_388 = load i64, i64* getelementptr inbounds (%struct.array4_unknown, %struct.array4_unknown* @__main1_MOD_mclmsg, i64 0, i32 3, i64 3, i32 0), align 8
-  %D.75828_389 = mul nsw i64 %D.75704_387, %D.75827_388
-  %D.75851_390 = add nsw i64 %D.75850_385, %D.75828_389
-  %D.75830_391 = load i64, i64* getelementptr inbounds (%struct.array4_unknown, %struct.array4_unknown* @__main1_MOD_mclmsg, i64 0, i32 1), align 8
-  %D.75852_392 = add nsw i64 %D.75851_390, %D.75830_391
-  %tmp146 = bitcast i8* %D.75820_373 to [0 x [1 x i8]]*
-  %tmp147 = bitcast [0 x [1 x i8]]* %tmp146 to [1 x i8]*
-  %D.75853_393 = getelementptr inbounds [1 x i8], [1 x i8]* %tmp147, i64 %D.75852_392
-  call void bitcast (void (%struct.__st_parameter_dt*, i8*, i32)* @_gfortran_transfer_character_write to void (%struct.__st_parameter_dt*, [1 x i8]*, i32)*)(%struct.__st_parameter_dt* %memtmp3, [1 x i8]* %D.75853_393, i32 1) nounwind
-; CHECK: @_gfortran_transfer_character_write
-  %D.75833_394 = load i8*, i8** getelementptr inbounds (%"struct.array4_integer(kind=4).73", %"struct.array4_integer(kind=4).73"* @__main1_MOD_mxdate, i64 0, i32 0), align 8
-  %j1.8760_395 = load i32, i32* @j1.4581, align 4
-  %D.75773_396 = sext i32 %j1.8760_395 to i64
-  %iave.8736_397 = load i32, i32* @__main1_MOD_iave, align 4
-  %D.75620_398 = sext i32 %iave.8736_397 to i64
-  %D.75834_399 = load i64, i64* getelementptr inbounds (%"struct.array4_integer(kind=4).73", %"struct.array4_integer(kind=4).73"* @__main1_MOD_mxdate, i64 0, i32 3, i64 2, i32 0), align 8
-  %D.75835_400 = mul nsw i64 %D.75620_398, %D.75834_399
-  %igrp.8737_401 = load i32, i32* @__main1_MOD_igrp, align 4
-  %D.75635_402 = sext i32 %igrp.8737_401 to i64
-  %D.75836_403 = load i64, i64* getelementptr inbounds (%"struct.array4_integer(kind=4).73", %"struct.array4_integer(kind=4).73"* @__main1_MOD_mxdate, i64 0, i32 3, i64 1, i32 0), align 8
-  %D.75837_404 = mul nsw i64 %D.75635_402, %D.75836_403
-  %D.75838_405 = add nsw i64 %D.75835_400, %D.75837_404
-  %D.75854_406 = add nsw i64 %D.75773_396, %D.75838_405
-  %ityp.8750_407 = load i32, i32* @__main1_MOD_ityp, align 4
-  %D.75704_408 = sext i32 %ityp.8750_407 to i64
-  %D.75840_409 = load i64, i64* getelementptr inbounds (%"struct.array4_integer(kind=4).73", %"struct.array4_integer(kind=4).73"* @__main1_MOD_mxdate, i64 0, i32 3, i64 3, i32 0), align 8
-  %D.75841_410 = mul nsw i64 %D.75704_408, %D.75840_409
-  %D.75855_411 = add nsw i64 %D.75854_406, %D.75841_410
-  %D.75843_412 = load i64, i64* getelementptr inbounds (%"struct.array4_integer(kind=4).73", %"struct.array4_integer(kind=4).73"* @__main1_MOD_mxdate, i64 0, i32 1), align 8
-  %D.75856_413 = add nsw i64 %D.75855_411, %D.75843_412
-  %tmp148 = bitcast i8* %D.75833_394 to [0 x i32]*
-  %tmp149 = bitcast [0 x i32]* %tmp148 to i32*
-  %D.75857_414 = getelementptr inbounds i32, i32* %tmp149, i64 %D.75856_413
-  call void bitcast (void (%struct.__st_parameter_dt*, i8*, i32)* @_gfortran_transfer_integer_write to void (%struct.__st_parameter_dt*, i32*, i32)*)(%struct.__st_parameter_dt* %memtmp3, i32* %D.75857_414, i32 4) nounwind
-; CHECK: @_gfortran_transfer_integer_write
-  call void bitcast (void (%struct.__st_parameter_dt*, i8*, i32)* @_gfortran_transfer_real_write to void (%struct.__st_parameter_dt*, float*, i32)*)(%struct.__st_parameter_dt* %memtmp3, float* @xr2.4593, i32 4) nounwind
-; CHECK: @_gfortran_transfer_real_write
-  call void bitcast (void (%struct.__st_parameter_dt*, i8*, i32)* @_gfortran_transfer_real_write to void (%struct.__st_parameter_dt*, float*, i32)*)(%struct.__st_parameter_dt* %memtmp3, float* @yr2.4595, i32 4) nounwind
-; CHECK: @_gfortran_transfer_real_write
-  call void bitcast (void (%struct.__st_parameter_dt*, i8*, i32)* @_gfortran_transfer_character_write to void (%struct.__st_parameter_dt*, [2 x i8]*, i32)*)(%struct.__st_parameter_dt* %memtmp3, [2 x i8]* @nty2.4591, i32 2) nounwind
-; CHECK: @_gfortran_transfer_character_write
-  call void @_gfortran_st_write_done(%struct.__st_parameter_dt* %memtmp3) nounwind
-; CHECK: @_gfortran_st_write_done
-  %j.8758_415 = load i32, i32* @j.4580, align 4
-  %D.4634_416 = icmp eq i32 %j.8758_415, %D.4627_188.reload
-  %j.8758_417 = load i32, i32* @j.4580, align 4
-  %j.8770_418 = add nsw i32 %j.8758_417, 1
-  store i32 %j.8770_418, i32* @j.4580, align 4
-  %tmp150 = icmp ne i1 %D.4634_416, false
-  br i1 %tmp150, label %codeRepl80.exitStub, label %"<bb 34>.<bb 25>_crit_edge.exitStub"
-}
-
--- a/llvm/test/Transforms/BBVectorize/ld1.ll
+++ b/llvm/test/Transforms/BBVectorize/ld1.ll
@ -1,41 +0,0 @@
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-ignore-target-info -instcombine -gvn -S | FileCheck %s
-
-define double @test1(double* %a, double* %b, double* %c) nounwind uwtable readonly {
-entry:
-  %i0 = load double, double* %a, align 8
-  %i1 = load double, double* %b, align 8
-  %mul = fmul double %i0, %i1
-  %i2 = load double, double* %c, align 8
-  %add = fadd double %mul, %i2
-  %arrayidx3 = getelementptr inbounds double, double* %a, i64 1
-  %i3 = load double, double* %arrayidx3, align 8
-  %arrayidx4 = getelementptr inbounds double, double* %b, i64 1
-  %i4 = load double, double* %arrayidx4, align 8
-  %mul5 = fmul double %i3, %i4
-  %arrayidx6 = getelementptr inbounds double, double* %c, i64 1
-  %i5 = load double, double* %arrayidx6, align 8
-  %add7 = fadd double %mul5, %i5
-  %mul9 = fmul double %add, %i1
-  %add11 = fadd double %mul9, %i2
-  %mul13 = fmul double %add7, %i4
-  %add15 = fadd double %mul13, %i5
-  %mul16 = fmul double %add11, %add15
-  ret double %mul16
-; CHECK-LABEL: @test1(
-; CHECK: %i0.v.i0 = bitcast double* %a to <2 x double>*
-; CHECK: %i1.v.i0 = bitcast double* %b to <2 x double>*
-; CHECK: %i2.v.i0 = bitcast double* %c to <2 x double>*
-; CHECK: %i0 = load <2 x double>, <2 x double>* %i0.v.i0, align 8
-; CHECK: %i1 = load <2 x double>, <2 x double>* %i1.v.i0, align 8
-; CHECK: %mul = fmul <2 x double> %i0, %i1
-; CHECK: %i2 = load <2 x double>, <2 x double>* %i2.v.i0, align 8
-; CHECK: %add = fadd <2 x double> %mul, %i2
-; CHECK: %mul9 = fmul <2 x double> %add, %i1
-; CHECK: %add11 = fadd <2 x double> %mul9, %i2
-; CHECK: %add11.v.r1 = extractelement <2 x double> %add11, i32 0
-; CHECK: %add11.v.r2 = extractelement <2 x double> %add11, i32 1
-; CHECK: %mul16 = fmul double %add11.v.r1, %add11.v.r2
-; CHECK: ret double %mul16
-}
-
--- a/llvm/test/Transforms/BBVectorize/lit.local.cfg
+++ b/llvm/test/Transforms/BBVectorize/lit.local.cfg
@ -1,3 +0,0 @@
-if not 'X86' in config.root.targets:
-    config.unsupported = True
-
--- a/llvm/test/Transforms/BBVectorize/loop1.ll
+++ b/llvm/test/Transforms/BBVectorize/loop1.ll
@ -1,93 +0,0 @@
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
-; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-ignore-target-info -instcombine -gvn -S | FileCheck %s
-; RUN: opt < %s -dont-improve-non-negative-phi-bits=false -basicaa -loop-unroll -unroll-threshold=45 -unroll-partial-threshold=45 -unroll-allow-partial -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-ignore-target-info -instcombine -gvn -S | FileCheck %s -check-prefix=CHECK-UNRL
-; The second check covers the use of alias analysis (with loop unrolling).
-
-define void @test1(double* noalias %out, double* noalias %in1, double* noalias %in2) nounwind uwtable {
-entry:
-  br label %for.body
-; CHECK-LABEL: @test1(
-; CHECK-UNRL-LABEL: @test1(
-
-for.body:                                         ; preds = %for.body, %entry
-  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds double, double* %in1, i64 %indvars.iv
-  %0 = load double, double* %arrayidx, align 8
-  %arrayidx2 = getelementptr inbounds double, double* %in2, i64 %indvars.iv
-  %1 = load double, double* %arrayidx2, align 8
-  %mul = fmul double %0, %0
-  %mul3 = fmul double %0, %1
-  %add = fadd double %mul, %mul3
-  %add4 = fadd double %1, %1
-  %add5 = fadd double %add4, %0
-  %mul6 = fmul double %0, %add5
-  %add7 = fadd double %add, %mul6
-  %mul8 = fmul double %1, %1
-  %add9 = fadd double %0, %0
-  %add10 = fadd double %add9, %0
-  %mul11 = fmul double %mul8, %add10
-  %add12 = fadd double %add7, %mul11
-  %arrayidx14 = getelementptr inbounds double, double* %out, i64 %indvars.iv
-  store double %add12, double* %arrayidx14, align 8
-  %indvars.iv.next = add i64 %indvars.iv, 1
-  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
-  %exitcond = icmp eq i32 %lftr.wideiv, 10
-  br i1 %exitcond, label %for.end, label %for.body
-; CHECK: %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-; CHECK: %arrayidx = getelementptr inbounds double, double* %in1, i64 %indvars.iv
-; CHECK: %0 = load double, double* %arrayidx, align 8
-; CHECK: %arrayidx2 = getelementptr inbounds double, double* %in2, i64 %indvars.iv
-; CHECK: %1 = load double, double* %arrayidx2, align 8
-; CHECK: %mul = fmul double %0, %0
-; CHECK: %mul3 = fmul double %0, %1
-; CHECK: %add = fadd double %mul, %mul3
-; CHECK: %mul8 = fmul double %1, %1
-; CHECK: %add4.v.i1.1 = insertelement <2 x double> undef, double %1, i32 0
-; CHECK: %add4.v.i1.2 = insertelement <2 x double> %add4.v.i1.1, double %0, i32 1
-; CHECK: %add4 = fadd <2 x double> %add4.v.i1.2, %add4.v.i1.2
-; CHECK: %2 = insertelement <2 x double> undef, double %0, i32 0
-; CHECK: %add5.v.i1.2 = shufflevector <2 x double> %2, <2 x double> undef, <2 x i32> zeroinitializer
-; CHECK: %add5 = fadd <2 x double> %add4, %add5.v.i1.2
-; CHECK: %mul6.v.i0.2 = insertelement <2 x double> %2, double %mul8, i32 1
-; CHECK: %mul6 = fmul <2 x double> %mul6.v.i0.2, %add5
-; CHECK: %mul6.v.r1 = extractelement <2 x double> %mul6, i32 0
-; CHECK: %mul6.v.r2 = extractelement <2 x double> %mul6, i32 1
-; CHECK: %add7 = fadd double %add, %mul6.v.r1
-; CHECK: %add12 = fadd double %add7, %mul6.v.r2
-; CHECK: %arrayidx14 = getelementptr inbounds double, double* %out, i64 %indvars.iv
-; CHECK: store double %add12, double* %arrayidx14, align 8
-; CHECK: %indvars.iv.next = add i64 %indvars.iv, 1
-; CHECK: %lftr.wideiv = trunc i64 %indvars.iv.next to i32
-; CHECK: %exitcond = icmp eq i32 %lftr.wideiv, 10
-; CHECK: br i1 %exitcond, label %for.end, label %for.body
-; CHECK-UNRL: %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next.1, %for.body ]
-; CHECK-UNRL: %arrayidx = getelementptr inbounds double, double* %in1, i64 %indvars.iv
-; CHECK-UNRL: %0 = bitcast double* %arrayidx to <2 x double>*
-; CHECK-UNRL: %arrayidx2 = getelementptr inbounds double, double* %in2, i64 %indvars.iv
-; CHECK-UNRL: %1 = bitcast double* %arrayidx2 to <2 x double>*
-; CHECK-UNRL: %arrayidx14 = getelementptr inbounds double, double* %out, i64 %indvars.iv
-; CHECK-UNRL: %2 = load <2 x double>, <2 x double>* %0, align 8
-; CHECK-UNRL: %3 = load <2 x double>, <2 x double>* %1, align 8
-; CHECK-UNRL: %mul = fmul <2 x double> %2, %2
-; CHECK-UNRL: %mul3 = fmul <2 x double> %2, %3
-; CHECK-UNRL: %add = fadd <2 x double> %mul, %mul3
-; CHECK-UNRL: %add4 = fadd <2 x double> %3, %3
-; CHECK-UNRL: %add5 = fadd <2 x double> %add4, %2
-; CHECK-UNRL: %mul6 = fmul <2 x double> %2, %add5
-; CHECK-UNRL: %add7 = fadd <2 x double> %add, %mul6
-; CHECK-UNRL: %mul8 = fmul <2 x double> %3, %3
-; CHECK-UNRL: %add9 = fadd <2 x double> %2, %2
-; CHECK-UNRL: %add10 = fadd <2 x double> %add9, %2
-; CHECK-UNRL: %mul11 = fmul <2 x double> %mul8, %add10
-; CHECK-UNRL: %add12 = fadd <2 x double> %add7, %mul11
-; CHECK-UNRL: %4 = bitcast double* %arrayidx14 to <2 x double>*
-; CHECK-UNRL: store <2 x double> %add12, <2 x double>* %4, align 8
-; CHECK-UNRL: %indvars.iv.next.1 = add nuw nsw i64 %indvars.iv, 2
-; CHECK-UNRL: %lftr.wideiv.1 = trunc i64 %indvars.iv.next.1 to i32
-; CHECK-UNRL: %exitcond.1 = icmp eq i32 %lftr.wideiv.1, 10
-; CHECK-UNRL: br i1 %exitcond.1, label %for.end, label %for.body
-
-for.end:                                          ; preds = %for.body
-  ret void
-}
--- a/llvm/test/Transforms/BBVectorize/mem-op-depth.ll
+++ b/llvm/test/Transforms/BBVectorize/mem-op-depth.ll
@ -1,22 +0,0 @@
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
-target triple = "x86_64-unknown-linux-gnu"
-; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=6 -instcombine -gvn -S | FileCheck %s
-
-@A = common global [1024 x float] zeroinitializer, align 16
-@B = common global [1024 x float] zeroinitializer, align 16
-
-define i32 @test1() nounwind {
-; CHECK-LABEL: @test1(
-  %V1 = load float, float* getelementptr inbounds ([1024 x float], [1024 x float]* @A, i64 0, i64 0), align 16
-  %V2 = load float, float* getelementptr inbounds ([1024 x float], [1024 x float]* @A, i64 0, i64 1), align 4
-  %V3= load float, float* getelementptr inbounds ([1024 x float], [1024 x float]* @A, i64 0, i64 2), align 8
-  %V4 = load float, float* getelementptr inbounds ([1024 x float], [1024 x float]* @A, i64 0, i64 3), align 4
-; CHECK:   %V1 = load <4 x float>, <4 x float>* bitcast ([1024 x float]* @A to <4 x float>*), align 16
-  store float %V1, float* getelementptr inbounds ([1024 x float], [1024 x float]* @B, i64 0, i64 0), align 16
-  store float %V2, float* getelementptr inbounds ([1024 x float], [1024 x float]* @B, i64 0, i64 1), align 4
-  store float %V3, float* getelementptr inbounds ([1024 x float], [1024 x float]* @B, i64 0, i64 2), align 8
-  store float %V4, float* getelementptr inbounds ([1024 x float], [1024 x float]* @B, i64 0, i64 3), align 4
-; CHECK-NEXT: store <4 x float> %V1, <4 x float>* bitcast ([1024 x float]* @B to <4 x float>*), align 16
-  ret i32 0
-; CHECK-NEXT: ret i32 0
-}
--- a/llvm/test/Transforms/BBVectorize/metadata.ll
+++ b/llvm/test/Transforms/BBVectorize/metadata.ll
@ -1,49 +0,0 @@
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -S | FileCheck %s
-
-; Simple 3-pair chain with loads and stores (with fpmath)
-define void @test1(double* %a, double* %b, double* %c) nounwind uwtable readonly {
-entry:
-  %i0 = load double, double* %a, align 8
-  %i1 = load double, double* %b, align 8
-  %mul = fmul double %i0, %i1, !fpmath !2
-  %arrayidx3 = getelementptr inbounds double, double* %a, i64 1
-  %i3 = load double, double* %arrayidx3, align 8
-  %arrayidx4 = getelementptr inbounds double, double* %b, i64 1
-  %i4 = load double, double* %arrayidx4, align 8
-  %mul5 = fmul double %i3, %i4, !fpmath !3
-  store double %mul, double* %c, align 8
-  %arrayidx5 = getelementptr inbounds double, double* %c, i64 1
-  store double %mul5, double* %arrayidx5, align 8
-  ret void
-; CHECK-LABEL: @test1(
-; CHECK: !fpmath
-; CHECK: ret void
-}
-
-; Simple 3-pair chain with loads and stores (ints with range)
-define void @test2(i64* %a, i64* %b, i64* %c) nounwind uwtable readonly {
-entry:
-  %i0 = load i64, i64* %a, align 8, !range !0
-  %i1 = load i64, i64* %b, align 8
-  %mul = mul i64 %i0, %i1
-  %arrayidx3 = getelementptr inbounds i64, i64* %a, i64 1
-  %i3 = load i64, i64* %arrayidx3, align 8, !range !1
-  %arrayidx4 = getelementptr inbounds i64, i64* %b, i64 1
-  %i4 = load i64, i64* %arrayidx4, align 8
-  %mul5 = mul i64 %i3, %i4
-  store i64 %mul, i64* %c, align 8
-  %arrayidx5 = getelementptr inbounds i64, i64* %c, i64 1
-  store i64 %mul5, i64* %arrayidx5, align 8
-  ret void
-; CHECK-LABEL: @test2(
-; CHECK-NOT: !range
-; CHECK: ret void
-}
-
-!0 = !{i64 0, i64 2}
-!1 = !{i64 3, i64 5}
-
-!2 = !{ float 5.0 }
-!3 = !{ float 2.5 }
-
--- a/llvm/test/Transforms/BBVectorize/no-ldstr-conn.ll
+++ b/llvm/test/Transforms/BBVectorize/no-ldstr-conn.ll
@ -1,23 +0,0 @@
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=2 -instcombine -gvn -S | FileCheck %s
-
-; Make sure that things (specifically getelementptr) are not connected to loads
-; and stores via the address operand (which would be bad because the address
-; is really a scalar even after vectorization)
-define i64 @test2(i64 %a) nounwind uwtable readonly {
-entry:
-  %a1 = inttoptr i64 %a to i64*
-  %a2 = getelementptr i64, i64* %a1, i64 1
-  %a3 = getelementptr i64, i64* %a1, i64 2
-  %v2 = load i64, i64* %a2, align 8
-  %v3 = load i64, i64* %a3, align 8
-  %v2a = add i64 %v2, 5
-  %v3a = add i64 %v3, 7
-  store i64 %v2a, i64* %a2, align 8
-  store i64 %v3a, i64* %a3, align 8
-  %r = add i64 %v2, %v3
-  ret i64 %r
-; CHECK-LABEL: @test2(
-; CHECK-NOT: getelementptr i64, <2 x i64*>
-}
-
--- a/llvm/test/Transforms/BBVectorize/req-depth.ll
+++ b/llvm/test/Transforms/BBVectorize/req-depth.ll
@ -1,17 +0,0 @@
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
-; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth 3 -bb-vectorize-ignore-target-info -S | FileCheck %s -check-prefix=CHECK-RD3
-; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth 2 -bb-vectorize-ignore-target-info -S | FileCheck %s -check-prefix=CHECK-RD2
-
-define double @test1(double %A1, double %A2, double %B1, double %B2) {
-	%X1 = fsub double %A1, %B1
-	%X2 = fsub double %A2, %B2
-	%Y1 = fmul double %X1, %A1
-	%Y2 = fmul double %X2, %A2
-	%R  = fmul double %Y1, %Y2
-	ret double %R
-; CHECK-RD3-LABEL: @test1(
-; CHECK-RD2-LABEL: @test1(
-; CHECK-RD3-NOT: <2 x double>
-; CHECK-RD2: <2 x double>
-}
-
--- a/llvm/test/Transforms/BBVectorize/search-limit.ll
+++ b/llvm/test/Transforms/BBVectorize/search-limit.ll
@ -1,46 +0,0 @@
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
-; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-ignore-target-info -instcombine -gvn -S | FileCheck %s
-; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-search-limit=4 -bb-vectorize-ignore-target-info -instcombine -gvn -S | FileCheck %s -check-prefix=CHECK-SL4
-
-define double @test1(double %A1, double %A2, double %B1, double %B2) {
-; CHECK-LABEL: @test1(
-; CHECK-SL4-LABEL: @test1(
-; CHECK-SL4-NOT: <2 x double>
-; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
-; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
-; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
-; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
-	%X1 = fsub double %A1, %B1
-	%X2 = fsub double %A2, %B2
-; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
-	%Y1 = fmul double %X1, %A1
-	%Y2 = fmul double %X2, %A2
-; CHECK: %Y1 = fmul <2 x double> %X1, %X1.v.i0.2
-	%Z1 = fadd double %Y1, %B1
-        ; Here we have a dependency chain: the short search limit will not
-        ; see past this chain and so will not see the second part of the
-        ; pair to vectorize.
-        %mul41 = fmul double %Z1, %Y2
-        %sub48 = fsub double %Z1, %mul41
-        %mul62 = fmul double %Z1, %sub48
-        %sub69 = fsub double %Z1, %mul62
-        %mul83 = fmul double %Z1, %sub69
-        %sub90 = fsub double %Z1, %mul83
-        %mul104 = fmul double %Z1, %sub90
-        %sub111 = fsub double %Z1, %mul104
-        %mul125 = fmul double %Z1, %sub111
-        %sub132 = fsub double %Z1, %mul125
-        %mul146 = fmul double %Z1, %sub132
-        %sub153 = fsub double %Z1, %mul146
-        ; end of chain.
-	%Z2 = fadd double %Y2, %B2
-; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2
-	%R1  = fdiv double %Z1, %Z2
-        %R   = fmul double %R1, %sub153
-; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0
-; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1
-; CHECK: %R1 = fdiv double %Z1.v.r1, %Z1.v.r2
-	ret double %R
-; CHECK: ret double %R
-}
-
--- a/llvm/test/Transforms/BBVectorize/simple-int.ll
+++ b/llvm/test/Transforms/BBVectorize/simple-int.ll
@ -1,514 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-ignore-target-info -instcombine -gvn -S | FileCheck %s
-
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
-
-declare double @llvm.fma.f64(double, double, double)
-declare double @llvm.fmuladd.f64(double, double, double)
-declare double @llvm.cos.f64(double)
-declare double @llvm.powi.f64(double, i32)
-declare double @llvm.round.f64(double)
-declare double @llvm.copysign.f64(double, double)
-declare double @llvm.ceil.f64(double)
-declare double @llvm.nearbyint.f64(double)
-declare double @llvm.rint.f64(double)
-declare double @llvm.trunc.f64(double)
-declare double @llvm.floor.f64(double)
-declare double @llvm.fabs.f64(double)
-declare i64 @llvm.bswap.i64(i64)
-declare i64 @llvm.ctpop.i64(i64)
-declare i64 @llvm.ctlz.i64(i64, i1)
-declare i64 @llvm.cttz.i64(i64, i1)
-
-; Basic depth-3 chain with fma
-define double @test1(double %A1, double %A2, double %B1, double %B2, double %C1, double %C2) {
-; CHECK-LABEL: @test1(
-; CHECK-NEXT:    [[X1_V_I1_1:%.*]] = insertelement <2 x double> undef, double [[B1:%.*]], i32 0
-; CHECK-NEXT:    [[X1_V_I1_2:%.*]] = insertelement <2 x double> [[X1_V_I1_1]], double [[B2:%.*]], i32 1
-; CHECK-NEXT:    [[X1_V_I0_1:%.*]] = insertelement <2 x double> undef, double [[A1:%.*]], i32 0
-; CHECK-NEXT:    [[X1_V_I0_2:%.*]] = insertelement <2 x double> [[X1_V_I0_1]], double [[A2:%.*]], i32 1
-; CHECK-NEXT:    [[X1:%.*]] = fsub <2 x double> [[X1_V_I0_2]], [[X1_V_I1_2]]
-; CHECK-NEXT:    [[Y1_V_I2_1:%.*]] = insertelement <2 x double> undef, double [[C1:%.*]], i32 0
-; CHECK-NEXT:    [[Y1_V_I2_2:%.*]] = insertelement <2 x double> [[Y1_V_I2_1]], double [[C2:%.*]], i32 1
-; CHECK-NEXT:    [[Y1:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[X1]], <2 x double> [[X1_V_I0_2]], <2 x double> [[Y1_V_I2_2]])
-; CHECK-NEXT:    [[Z1:%.*]] = fadd <2 x double> [[Y1]], [[X1_V_I1_2]]
-; CHECK-NEXT:    [[Z1_V_R1:%.*]] = extractelement <2 x double> [[Z1]], i32 0
-; CHECK-NEXT:    [[Z1_V_R2:%.*]] = extractelement <2 x double> [[Z1]], i32 1
-; CHECK-NEXT:    [[R:%.*]] = fmul double [[Z1_V_R1]], [[Z1_V_R2]]
-; CHECK-NEXT:    ret double [[R]]
-;
-  %X1 = fsub double %A1, %B1
-  %X2 = fsub double %A2, %B2
-  %Y1 = call double @llvm.fma.f64(double %X1, double %A1, double %C1)
-  %Y2 = call double @llvm.fma.f64(double %X2, double %A2, double %C2)
-  %Z1 = fadd double %Y1, %B1
-  %Z2 = fadd double %Y2, %B2
-  %R  = fmul double %Z1, %Z2
-  ret double %R
-}
-
-; Basic depth-3 chain with fmuladd
-define double @test1a(double %A1, double %A2, double %B1, double %B2, double %C1, double %C2) {
-; CHECK-LABEL: @test1a(
-; CHECK-NEXT:    [[X1_V_I1_1:%.*]] = insertelement <2 x double> undef, double [[B1:%.*]], i32 0
-; CHECK-NEXT:    [[X1_V_I1_2:%.*]] = insertelement <2 x double> [[X1_V_I1_1]], double [[B2:%.*]], i32 1
-; CHECK-NEXT:    [[X1_V_I0_1:%.*]] = insertelement <2 x double> undef, double [[A1:%.*]], i32 0
-; CHECK-NEXT:    [[X1_V_I0_2:%.*]] = insertelement <2 x double> [[X1_V_I0_1]], double [[A2:%.*]], i32 1
-; CHECK-NEXT:    [[X1:%.*]] = fsub <2 x double> [[X1_V_I0_2]], [[X1_V_I1_2]]
-; CHECK-NEXT:    [[Y1_V_I2_1:%.*]] = insertelement <2 x double> undef, double [[C1:%.*]], i32 0
-; CHECK-NEXT:    [[Y1_V_I2_2:%.*]] = insertelement <2 x double> [[Y1_V_I2_1]], double [[C2:%.*]], i32 1
-; CHECK-NEXT:    [[Y1:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[X1]], <2 x double> [[X1_V_I0_2]], <2 x double> [[Y1_V_I2_2]])
-; CHECK-NEXT:    [[Z1:%.*]] = fadd <2 x double> [[Y1]], [[X1_V_I1_2]]
-; CHECK-NEXT:    [[Z1_V_R1:%.*]] = extractelement <2 x double> [[Z1]], i32 0
-; CHECK-NEXT:    [[Z1_V_R2:%.*]] = extractelement <2 x double> [[Z1]], i32 1
-; CHECK-NEXT:    [[R:%.*]] = fmul double [[Z1_V_R1]], [[Z1_V_R2]]
-; CHECK-NEXT:    ret double [[R]]
-;
-  %X1 = fsub double %A1, %B1
-  %X2 = fsub double %A2, %B2
-  %Y1 = call double @llvm.fmuladd.f64(double %X1, double %A1, double %C1)
-  %Y2 = call double @llvm.fmuladd.f64(double %X2, double %A2, double %C2)
-  %Z1 = fadd double %Y1, %B1
-  %Z2 = fadd double %Y2, %B2
-  %R  = fmul double %Z1, %Z2
-  ret double %R
-}
-
-; Basic depth-3 chain with cos
-define double @test2(double %A1, double %A2, double %B1, double %B2) {
-; CHECK-LABEL: @test2(
-; CHECK-NEXT:    [[X1_V_I1_1:%.*]] = insertelement <2 x double> undef, double [[B1:%.*]], i32 0
-; CHECK-NEXT:    [[X1_V_I1_2:%.*]] = insertelement <2 x double> [[X1_V_I1_1]], double [[B2:%.*]], i32 1
-; CHECK-NEXT:    [[X1_V_I0_1:%.*]] = insertelement <2 x double> undef, double [[A1:%.*]], i32 0
-; CHECK-NEXT:    [[X1_V_I0_2:%.*]] = insertelement <2 x double> [[X1_V_I0_1]], double [[A2:%.*]], i32 1
-; CHECK-NEXT:    [[X1:%.*]] = fsub <2 x double> [[X1_V_I0_2]], [[X1_V_I1_2]]
-; CHECK-NEXT:    [[Y1:%.*]] = call <2 x double> @llvm.cos.v2f64(<2 x double> [[X1]])
-; CHECK-NEXT:    [[Z1:%.*]] = fadd <2 x double> [[Y1]], [[X1_V_I1_2]]
-; CHECK-NEXT:    [[Z1_V_R1:%.*]] = extractelement <2 x double> [[Z1]], i32 0
-; CHECK-NEXT:    [[Z1_V_R2:%.*]] = extractelement <2 x double> [[Z1]], i32 1
-; CHECK-NEXT:    [[R:%.*]] = fmul double [[Z1_V_R1]], [[Z1_V_R2]]
-; CHECK-NEXT:    ret double [[R]]
-;
-  %X1 = fsub double %A1, %B1
-  %X2 = fsub double %A2, %B2
-  %Y1 = call double @llvm.cos.f64(double %X1)
-  %Y2 = call double @llvm.cos.f64(double %X2)
-  %Z1 = fadd double %Y1, %B1
-  %Z2 = fadd double %Y2, %B2
-  %R  = fmul double %Z1, %Z2
-  ret double %R
-}
-
-; Basic depth-3 chain with powi
-define double @test3(double %A1, double %A2, double %B1, double %B2, i32 %P) {
-; CHECK-LABEL: @test3(
-; CHECK-NEXT:    [[X1_V_I1_1:%.*]] = insertelement <2 x double> undef, double [[B1:%.*]], i32 0
-; CHECK-NEXT:    [[X1_V_I1_2:%.*]] = insertelement <2 x double> [[X1_V_I1_1]], double [[B2:%.*]], i32 1
-; CHECK-NEXT:    [[X1_V_I0_1:%.*]] = insertelement <2 x double> undef, double [[A1:%.*]], i32 0
-; CHECK-NEXT:    [[X1_V_I0_2:%.*]] = insertelement <2 x double> [[X1_V_I0_1]], double [[A2:%.*]], i32 1
-; CHECK-NEXT:    [[X1:%.*]] = fsub <2 x double> [[X1_V_I0_2]], [[X1_V_I1_2]]
-; CHECK-NEXT:    [[Y1:%.*]] = call <2 x double> @llvm.powi.v2f64(<2 x double> [[X1]], i32 [[P:%.*]])
-; CHECK-NEXT:    [[Z1:%.*]] = fadd <2 x double> [[Y1]], [[X1_V_I1_2]]
-; CHECK-NEXT:    [[Z1_V_R1:%.*]] = extractelement <2 x double> [[Z1]], i32 0
-; CHECK-NEXT:    [[Z1_V_R2:%.*]] = extractelement <2 x double> [[Z1]], i32 1
-; CHECK-NEXT:    [[R:%.*]] = fmul double [[Z1_V_R1]], [[Z1_V_R2]]
-; CHECK-NEXT:    ret double [[R]]
-;
-  %X1 = fsub double %A1, %B1
-  %X2 = fsub double %A2, %B2
-  %Y1 = call double @llvm.powi.f64(double %X1, i32 %P)
-  %Y2 = call double @llvm.powi.f64(double %X2, i32 %P)
-  %Z1 = fadd double %Y1, %B1
-  %Z2 = fadd double %Y2, %B2
-  %R  = fmul double %Z1, %Z2
-  ret double %R
-}
-
-; Basic depth-3 chain with powi (different powers: should not vectorize)
-define double @test4(double %A1, double %A2, double %B1, double %B2, i32 %P) {
-; CHECK-LABEL: @test4(
-; CHECK-NEXT:    [[X1:%.*]] = fsub double [[A1:%.*]], [[B1:%.*]]
-; CHECK-NEXT:    [[X2:%.*]] = fsub double [[A2:%.*]], [[B2:%.*]]
-; CHECK-NEXT:    [[P2:%.*]] = add i32 [[P:%.*]], 1
-; CHECK-NEXT:    [[Y1:%.*]] = call double @llvm.powi.f64(double [[X1]], i32 [[P]])
-; CHECK-NEXT:    [[Y2:%.*]] = call double @llvm.powi.f64(double [[X2]], i32 [[P2]])
-; CHECK-NEXT:    [[Z1:%.*]] = fadd double [[Y1]], [[B1]]
-; CHECK-NEXT:    [[Z2:%.*]] = fadd double [[Y2]], [[B2]]
-; CHECK-NEXT:    [[R:%.*]] = fmul double [[Z1]], [[Z2]]
-; CHECK-NEXT:    ret double [[R]]
-;
-  %X1 = fsub double %A1, %B1
-  %X2 = fsub double %A2, %B2
-  %P2 = add i32 %P, 1
-  %Y1 = call double @llvm.powi.f64(double %X1, i32 %P)
-  %Y2 = call double @llvm.powi.f64(double %X2, i32 %P2)
-  %Z1 = fadd double %Y1, %B1
-  %Z2 = fadd double %Y2, %B2
-  %R  = fmul double %Z1, %Z2
-  ret double %R
-}
-
-; Basic depth-3 chain with round
-define double @testround(double %A1, double %A2, double %B1, double %B2) {
-; CHECK-LABEL: @testround(
-; CHECK-NEXT:    [[X1_V_I1_1:%.*]] = insertelement <2 x double> undef, double [[B1:%.*]], i32 0
-; CHECK-NEXT:    [[X1_V_I1_2:%.*]] = insertelement <2 x double> [[X1_V_I1_1]], double [[B2:%.*]], i32 1
-; CHECK-NEXT:    [[X1_V_I0_1:%.*]] = insertelement <2 x double> undef, double [[A1:%.*]], i32 0
-; CHECK-NEXT:    [[X1_V_I0_2:%.*]] = insertelement <2 x double> [[X1_V_I0_1]], double [[A2:%.*]], i32 1
-; CHECK-NEXT:    [[X1:%.*]] = fsub <2 x double> [[X1_V_I0_2]], [[X1_V_I1_2]]
-; CHECK-NEXT:    [[Y1:%.*]] = call <2 x double> @llvm.round.v2f64(<2 x double> [[X1]])
-; CHECK-NEXT:    [[Z1:%.*]] = fadd <2 x double> [[Y1]], [[X1_V_I1_2]]
-; CHECK-NEXT:    [[Z1_V_R1:%.*]] = extractelement <2 x double> [[Z1]], i32 0
-; CHECK-NEXT:    [[Z1_V_R2:%.*]] = extractelement <2 x double> [[Z1]], i32 1
-; CHECK-NEXT:    [[R:%.*]] = fmul double [[Z1_V_R1]], [[Z1_V_R2]]
-; CHECK-NEXT:    ret double [[R]]
-;
-  %X1 = fsub double %A1, %B1
-  %X2 = fsub double %A2, %B2
-  %Y1 = call double @llvm.round.f64(double %X1)
-  %Y2 = call double @llvm.round.f64(double %X2)
-  %Z1 = fadd double %Y1, %B1
-  %Z2 = fadd double %Y2, %B2
-  %R  = fmul double %Z1, %Z2
-  ret double %R
-}
-
-; Basic depth-3 chain with copysign
-define double @testcopysign(double %A1, double %A2, double %B1, double %B2) {
-; CHECK-LABEL: @testcopysign(
-; CHECK-NEXT:    [[X1_V_I1_1:%.*]] = insertelement <2 x double> undef, double [[B1:%.*]], i32 0
-; CHECK-NEXT:    [[X1_V_I1_2:%.*]] = insertelement <2 x double> [[X1_V_I1_1]], double [[B2:%.*]], i32 1
-; CHECK-NEXT:    [[X1_V_I0_1:%.*]] = insertelement <2 x double> undef, double [[A1:%.*]], i32 0
-; CHECK-NEXT:    [[X1_V_I0_2:%.*]] = insertelement <2 x double> [[X1_V_I0_1]], double [[A2:%.*]], i32 1
-; CHECK-NEXT:    [[X1:%.*]] = fsub <2 x double> [[X1_V_I0_2]], [[X1_V_I1_2]]
-; CHECK-NEXT:    [[Y1_V_I1_2:%.*]] = shufflevector <2 x double> [[X1_V_I0_1]], <2 x double> undef, <2 x i32> zeroinitializer
-; CHECK-NEXT:    [[Y1:%.*]] = call <2 x double> @llvm.copysign.v2f64(<2 x double> [[X1]], <2 x double> [[Y1_V_I1_2]])
-; CHECK-NEXT:    [[Z1:%.*]] = fadd <2 x double> [[Y1]], [[X1_V_I1_2]]
-; CHECK-NEXT:    [[Z1_V_R1:%.*]] = extractelement <2 x double> [[Z1]], i32 0
-; CHECK-NEXT:    [[Z1_V_R2:%.*]] = extractelement <2 x double> [[Z1]], i32 1
-; CHECK-NEXT:    [[R:%.*]] = fmul double [[Z1_V_R1]], [[Z1_V_R2]]
-; CHECK-NEXT:    ret double [[R]]
-;
-  %X1 = fsub double %A1, %B1
-  %X2 = fsub double %A2, %B2
-  %Y1 = call double @llvm.copysign.f64(double %X1, double %A1)
-  %Y2 = call double @llvm.copysign.f64(double %X2, double %A1)
-  %Z1 = fadd double %Y1, %B1
-  %Z2 = fadd double %Y2, %B2
-  %R  = fmul double %Z1, %Z2
-  ret double %R
-}
-
-; Basic depth-3 chain with ceil
-define double @testceil(double %A1, double %A2, double %B1, double %B2) {
-; CHECK-LABEL: @testceil(
-; CHECK-NEXT:    [[X1_V_I1_1:%.*]] = insertelement <2 x double> undef, double [[B1:%.*]], i32 0
-; CHECK-NEXT:    [[X1_V_I1_2:%.*]] = insertelement <2 x double> [[X1_V_I1_1]], double [[B2:%.*]], i32 1
-; CHECK-NEXT:    [[X1_V_I0_1:%.*]] = insertelement <2 x double> undef, double [[A1:%.*]], i32 0
-; CHECK-NEXT:    [[X1_V_I0_2:%.*]] = insertelement <2 x double> [[X1_V_I0_1]], double [[A2:%.*]], i32 1
-; CHECK-NEXT:    [[X1:%.*]] = fsub <2 x double> [[X1_V_I0_2]], [[X1_V_I1_2]]
-; CHECK-NEXT:    [[Y1:%.*]] = call <2 x double> @llvm.ceil.v2f64(<2 x double> [[X1]])
-; CHECK-NEXT:    [[Z1:%.*]] = fadd <2 x double> [[Y1]], [[X1_V_I1_2]]
-; CHECK-NEXT:    [[Z1_V_R1:%.*]] = extractelement <2 x double> [[Z1]], i32 0
-; CHECK-NEXT:    [[Z1_V_R2:%.*]] = extractelement <2 x double> [[Z1]], i32 1
-; CHECK-NEXT:    [[R:%.*]] = fmul double [[Z1_V_R1]], [[Z1_V_R2]]
-; CHECK-NEXT:    ret double [[R]]
-;
-  %X1 = fsub double %A1, %B1
-  %X2 = fsub double %A2, %B2
-  %Y1 = call double @llvm.ceil.f64(double %X1)
-  %Y2 = call double @llvm.ceil.f64(double %X2)
-  %Z1 = fadd double %Y1, %B1
-  %Z2 = fadd double %Y2, %B2
-  %R  = fmul double %Z1, %Z2
-  ret double %R
-}
-
-; Basic depth-3 chain with nearbyint
-define double @testnearbyint(double %A1, double %A2, double %B1, double %B2) {
-; CHECK-LABEL: @testnearbyint(
-; CHECK-NEXT:    [[X1_V_I1_1:%.*]] = insertelement <2 x double> undef, double [[B1:%.*]], i32 0
-; CHECK-NEXT:    [[X1_V_I1_2:%.*]] = insertelement <2 x double> [[X1_V_I1_1]], double [[B2:%.*]], i32 1
-; CHECK-NEXT:    [[X1_V_I0_1:%.*]] = insertelement <2 x double> undef, double [[A1:%.*]], i32 0
-; CHECK-NEXT:    [[X1_V_I0_2:%.*]] = insertelement <2 x double> [[X1_V_I0_1]], double [[A2:%.*]], i32 1
-; CHECK-NEXT:    [[X1:%.*]] = fsub <2 x double> [[X1_V_I0_2]], [[X1_V_I1_2]]
-; CHECK-NEXT:    [[Y1:%.*]] = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> [[X1]])
-; CHECK-NEXT:    [[Z1:%.*]] = fadd <2 x double> [[Y1]], [[X1_V_I1_2]]
-; CHECK-NEXT:    [[Z1_V_R1:%.*]] = extractelement <2 x double> [[Z1]], i32 0
-; CHECK-NEXT:    [[Z1_V_R2:%.*]] = extractelement <2 x double> [[Z1]], i32 1
-; CHECK-NEXT:    [[R:%.*]] = fmul double [[Z1_V_R1]], [[Z1_V_R2]]
-; CHECK-NEXT:    ret double [[R]]
-;
-  %X1 = fsub double %A1, %B1
-  %X2 = fsub double %A2, %B2
-  %Y1 = call double @llvm.nearbyint.f64(double %X1)
-  %Y2 = call double @llvm.nearbyint.f64(double %X2)
-  %Z1 = fadd double %Y1, %B1
-  %Z2 = fadd double %Y2, %B2
-  %R  = fmul double %Z1, %Z2
-  ret double %R
-}
-
-; Basic depth-3 chain with rint
-define double @testrint(double %A1, double %A2, double %B1, double %B2) {
-; CHECK-LABEL: @testrint(
-; CHECK-NEXT:    [[X1_V_I1_1:%.*]] = insertelement <2 x double> undef, double [[B1:%.*]], i32 0
-; CHECK-NEXT:    [[X1_V_I1_2:%.*]] = insertelement <2 x double> [[X1_V_I1_1]], double [[B2:%.*]], i32 1
-; CHECK-NEXT:    [[X1_V_I0_1:%.*]] = insertelement <2 x double> undef, double [[A1:%.*]], i32 0
-; CHECK-NEXT:    [[X1_V_I0_2:%.*]] = insertelement <2 x double> [[X1_V_I0_1]], double [[A2:%.*]], i32 1
-; CHECK-NEXT:    [[X1:%.*]] = fsub <2 x double> [[X1_V_I0_2]], [[X1_V_I1_2]]
-; CHECK-NEXT:    [[Y1:%.*]] = call <2 x double> @llvm.rint.v2f64(<2 x double> [[X1]])
-; CHECK-NEXT:    [[Z1:%.*]] = fadd <2 x double> [[Y1]], [[X1_V_I1_2]]
-; CHECK-NEXT:    [[Z1_V_R1:%.*]] = extractelement <2 x double> [[Z1]], i32 0
-; CHECK-NEXT:    [[Z1_V_R2:%.*]] = extractelement <2 x double> [[Z1]], i32 1
-; CHECK-NEXT:    [[R:%.*]] = fmul double [[Z1_V_R1]], [[Z1_V_R2]]
-; CHECK-NEXT:    ret double [[R]]
-;
-  %X1 = fsub double %A1, %B1
-  %X2 = fsub double %A2, %B2
-  %Y1 = call double @llvm.rint.f64(double %X1)
-  %Y2 = call double @llvm.rint.f64(double %X2)
-  %Z1 = fadd double %Y1, %B1
-  %Z2 = fadd double %Y2, %B2
-  %R  = fmul double %Z1, %Z2
-  ret double %R
-}
-
-; Basic depth-3 chain with trunc
-define double @testtrunc(double %A1, double %A2, double %B1, double %B2) {
-; CHECK-LABEL: @testtrunc(
-; CHECK-NEXT:    [[X1_V_I1_1:%.*]] = insertelement <2 x double> undef, double [[B1:%.*]], i32 0
-; CHECK-NEXT:    [[X1_V_I1_2:%.*]] = insertelement <2 x double> [[X1_V_I1_1]], double [[B2:%.*]], i32 1
-; CHECK-NEXT:    [[X1_V_I0_1:%.*]] = insertelement <2 x double> undef, double [[A1:%.*]], i32 0
-; CHECK-NEXT:    [[X1_V_I0_2:%.*]] = insertelement <2 x double> [[X1_V_I0_1]], double [[A2:%.*]], i32 1
-; CHECK-NEXT:    [[X1:%.*]] = fsub <2 x double> [[X1_V_I0_2]], [[X1_V_I1_2]]
-; CHECK-NEXT:    [[Y1:%.*]] = call <2 x double> @llvm.trunc.v2f64(<2 x double> [[X1]])
-; CHECK-NEXT:    [[Z1:%.*]] = fadd <2 x double> [[Y1]], [[X1_V_I1_2]]
-; CHECK-NEXT:    [[Z1_V_R1:%.*]] = extractelement <2 x double> [[Z1]], i32 0
-; CHECK-NEXT:    [[Z1_V_R2:%.*]] = extractelement <2 x double> [[Z1]], i32 1
-; CHECK-NEXT:    [[R:%.*]] = fmul double [[Z1_V_R1]], [[Z1_V_R2]]
-; CHECK-NEXT:    ret double [[R]]
-;
-  %X1 = fsub double %A1, %B1
-  %X2 = fsub double %A2, %B2
-  %Y1 = call double @llvm.trunc.f64(double %X1)
-  %Y2 = call double @llvm.trunc.f64(double %X2)
-  %Z1 = fadd double %Y1, %B1
-  %Z2 = fadd double %Y2, %B2
-  %R  = fmul double %Z1, %Z2
-  ret double %R
-}
-
-; Basic depth-3 chain with floor
-define double @testfloor(double %A1, double %A2, double %B1, double %B2) {
-; CHECK-LABEL: @testfloor(
-; CHECK-NEXT:    [[X1_V_I1_1:%.*]] = insertelement <2 x double> undef, double [[B1:%.*]], i32 0
-; CHECK-NEXT:    [[X1_V_I1_2:%.*]] = insertelement <2 x double> [[X1_V_I1_1]], double [[B2:%.*]], i32 1
-; CHECK-NEXT:    [[X1_V_I0_1:%.*]] = insertelement <2 x double> undef, double [[A1:%.*]], i32 0
-; CHECK-NEXT:    [[X1_V_I0_2:%.*]] = insertelement <2 x double> [[X1_V_I0_1]], double [[A2:%.*]], i32 1
-; CHECK-NEXT:    [[X1:%.*]] = fsub <2 x double> [[X1_V_I0_2]], [[X1_V_I1_2]]
-; CHECK-NEXT:    [[Y1:%.*]] = call <2 x double> @llvm.floor.v2f64(<2 x double> [[X1]])
-; CHECK-NEXT:    [[Z1:%.*]] = fadd <2 x double> [[Y1]], [[X1_V_I1_2]]
-; CHECK-NEXT:    [[Z1_V_R1:%.*]] = extractelement <2 x double> [[Z1]], i32 0
-; CHECK-NEXT:    [[Z1_V_R2:%.*]] = extractelement <2 x double> [[Z1]], i32 1
-; CHECK-NEXT:    [[R:%.*]] = fmul double [[Z1_V_R1]], [[Z1_V_R2]]
-; CHECK-NEXT:    ret double [[R]]
-;
-  %X1 = fsub double %A1, %B1
-  %X2 = fsub double %A2, %B2
-  %Y1 = call double @llvm.floor.f64(double %X1)
-  %Y2 = call double @llvm.floor.f64(double %X2)
-  %Z1 = fadd double %Y1, %B1
-  %Z2 = fadd double %Y2, %B2
-  %R  = fmul double %Z1, %Z2
-  ret double %R
-}
-
-; Basic depth-3 chain with fabs
-define double @testfabs(double %A1, double %A2, double %B1, double %B2) {
-; CHECK-LABEL: @testfabs(
-; CHECK-NEXT:    [[X1_V_I1_1:%.*]] = insertelement <2 x double> undef, double [[B1:%.*]], i32 0
-; CHECK-NEXT:    [[X1_V_I1_2:%.*]] = insertelement <2 x double> [[X1_V_I1_1]], double [[B2:%.*]], i32 1
-; CHECK-NEXT:    [[X1_V_I0_1:%.*]] = insertelement <2 x double> undef, double [[A1:%.*]], i32 0
-; CHECK-NEXT:    [[X1_V_I0_2:%.*]] = insertelement <2 x double> [[X1_V_I0_1]], double [[A2:%.*]], i32 1
-; CHECK-NEXT:    [[X1:%.*]] = fsub <2 x double> [[X1_V_I0_2]], [[X1_V_I1_2]]
-; CHECK-NEXT:    [[Y1:%.*]] = call <2 x double> @llvm.fabs.v2f64(<2 x double> [[X1]])
-; CHECK-NEXT:    [[Z1:%.*]] = fadd <2 x double> [[Y1]], [[X1_V_I1_2]]
-; CHECK-NEXT:    [[Z1_V_R1:%.*]] = extractelement <2 x double> [[Z1]], i32 0
-; CHECK-NEXT:    [[Z1_V_R2:%.*]] = extractelement <2 x double> [[Z1]], i32 1
-; CHECK-NEXT:    [[R:%.*]] = fmul double [[Z1_V_R1]], [[Z1_V_R2]]
-; CHECK-NEXT:    ret double [[R]]
-;
-  %X1 = fsub double %A1, %B1
-  %X2 = fsub double %A2, %B2
-  %Y1 = call double @llvm.fabs.f64(double %X1)
-  %Y2 = call double @llvm.fabs.f64(double %X2)
-  %Z1 = fadd double %Y1, %B1
-  %Z2 = fadd double %Y2, %B2
-  %R  = fmul double %Z1, %Z2
-  ret double %R
-}
-
-; Basic depth-3 chain with bswap
-define i64 @testbswap(i64 %A1, i64 %A2, i64 %B1, i64 %B2) {
-; CHECK-LABEL: @testbswap(
-; CHECK-NEXT:    [[X1_V_I1_1:%.*]] = insertelement <2 x i64> undef, i64 [[B1:%.*]], i32 0
-; CHECK-NEXT:    [[X1_V_I1_2:%.*]] = insertelement <2 x i64> [[X1_V_I1_1]], i64 [[B2:%.*]], i32 1
-; CHECK-NEXT:    [[X1_V_I0_1:%.*]] = insertelement <2 x i64> undef, i64 [[A1:%.*]], i32 0
-; CHECK-NEXT:    [[X1_V_I0_2:%.*]] = insertelement <2 x i64> [[X1_V_I0_1]], i64 [[A2:%.*]], i32 1
-; CHECK-NEXT:    [[X1:%.*]] = sub <2 x i64> [[X1_V_I0_2]], [[X1_V_I1_2]]
-; CHECK-NEXT:    [[Y1:%.*]] = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> [[X1]])
-; CHECK-NEXT:    [[Z1:%.*]] = add <2 x i64> [[Y1]], [[X1_V_I1_2]]
-; CHECK-NEXT:    [[Z1_V_R1:%.*]] = extractelement <2 x i64> [[Z1]], i32 0
-; CHECK-NEXT:    [[Z1_V_R2:%.*]] = extractelement <2 x i64> [[Z1]], i32 1
-; CHECK-NEXT:    [[R:%.*]] = mul i64 [[Z1_V_R1]], [[Z1_V_R2]]
-; CHECK-NEXT:    ret i64 [[R]]
-;
-  %X1 = sub i64 %A1, %B1
-  %X2 = sub i64 %A2, %B2
-  %Y1 = call i64 @llvm.bswap.i64(i64 %X1)
-  %Y2 = call i64 @llvm.bswap.i64(i64 %X2)
-  %Z1 = add i64 %Y1, %B1
-  %Z2 = add i64 %Y2, %B2
-  %R  = mul i64 %Z1, %Z2
-  ret i64 %R
-}
-
-; Basic depth-3 chain with ctpop
-define i64 @testctpop(i64 %A1, i64 %A2, i64 %B1, i64 %B2) {
-; CHECK-LABEL: @testctpop(
-; CHECK-NEXT:    [[X1_V_I1_1:%.*]] = insertelement <2 x i64> undef, i64 [[B1:%.*]], i32 0
-; CHECK-NEXT:    [[X1_V_I1_2:%.*]] = insertelement <2 x i64> [[X1_V_I1_1]], i64 [[B2:%.*]], i32 1
-; CHECK-NEXT:    [[X1_V_I0_1:%.*]] = insertelement <2 x i64> undef, i64 [[A1:%.*]], i32 0
-; CHECK-NEXT:    [[X1_V_I0_2:%.*]] = insertelement <2 x i64> [[X1_V_I0_1]], i64 [[A2:%.*]], i32 1
-; CHECK-NEXT:    [[X1:%.*]] = sub <2 x i64> [[X1_V_I0_2]], [[X1_V_I1_2]]
-; CHECK-NEXT:    [[Y1:%.*]] = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> [[X1]])
-; CHECK-NEXT:    [[Z1:%.*]] = add <2 x i64> [[Y1]], [[X1_V_I1_2]]
-; CHECK-NEXT:    [[Z1_V_R1:%.*]] = extractelement <2 x i64> [[Z1]], i32 0
-; CHECK-NEXT:    [[Z1_V_R2:%.*]] = extractelement <2 x i64> [[Z1]], i32 1
-; CHECK-NEXT:    [[R:%.*]] = mul i64 [[Z1_V_R1]], [[Z1_V_R2]]
-; CHECK-NEXT:    ret i64 [[R]]
-;
-  %X1 = sub i64 %A1, %B1
-  %X2 = sub i64 %A2, %B2
-  %Y1 = call i64 @llvm.ctpop.i64(i64 %X1)
-  %Y2 = call i64 @llvm.ctpop.i64(i64 %X2)
-  %Z1 = add i64 %Y1, %B1
-  %Z2 = add i64 %Y2, %B2
-  %R  = mul i64 %Z1, %Z2
-  ret i64 %R
-}
-
-; Basic depth-3 chain with ctlz
-define i64 @testctlz(i64 %A1, i64 %A2, i64 %B1, i64 %B2) {
-; CHECK-LABEL: @testctlz(
-; CHECK-NEXT:    [[X1_V_I1_1:%.*]] = insertelement <2 x i64> undef, i64 [[B1:%.*]], i32 0
-; CHECK-NEXT:    [[X1_V_I1_2:%.*]] = insertelement <2 x i64> [[X1_V_I1_1]], i64 [[B2:%.*]], i32 1
-; CHECK-NEXT:    [[X1_V_I0_1:%.*]] = insertelement <2 x i64> undef, i64 [[A1:%.*]], i32 0
-; CHECK-NEXT:    [[X1_V_I0_2:%.*]] = insertelement <2 x i64> [[X1_V_I0_1]], i64 [[A2:%.*]], i32 1
-; CHECK-NEXT:    [[X1:%.*]] = sub <2 x i64> [[X1_V_I0_2]], [[X1_V_I1_2]]
-; CHECK-NEXT:    [[Y1:%.*]] = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> [[X1]], i1 true)
-; CHECK-NEXT:    [[Z1:%.*]] = add <2 x i64> [[Y1]], [[X1_V_I1_2]]
-; CHECK-NEXT:    [[Z1_V_R1:%.*]] = extractelement <2 x i64> [[Z1]], i32 0
-; CHECK-NEXT:    [[Z1_V_R2:%.*]] = extractelement <2 x i64> [[Z1]], i32 1
-; CHECK-NEXT:    [[R:%.*]] = mul i64 [[Z1_V_R1]], [[Z1_V_R2]]
-; CHECK-NEXT:    ret i64 [[R]]
-;
-  %X1 = sub i64 %A1, %B1
-  %X2 = sub i64 %A2, %B2
-  %Y1 = call i64 @llvm.ctlz.i64(i64 %X1, i1 true)
-  %Y2 = call i64 @llvm.ctlz.i64(i64 %X2, i1 true)
-  %Z1 = add i64 %Y1, %B1
-  %Z2 = add i64 %Y2, %B2
-  %R  = mul i64 %Z1, %Z2
-  ret i64 %R
-
-}
-
-; Basic depth-3 chain with ctlz
-define i64 @testctlzneg(i64 %A1, i64 %A2, i64 %B1, i64 %B2) {
-; CHECK-LABEL: @testctlzneg(
-; CHECK-NEXT:    [[X1:%.*]] = sub i64 [[A1:%.*]], [[B1:%.*]]
-; CHECK-NEXT:    [[X2:%.*]] = sub i64 [[A2:%.*]], [[B2:%.*]]
-; CHECK-NEXT:    [[Y1:%.*]] = call i64 @llvm.ctlz.i64(i64 [[X1]], i1 true), !range !0
-; CHECK-NEXT:    [[Y2:%.*]] = call i64 @llvm.ctlz.i64(i64 [[X2]], i1 false), !range !0
-; CHECK-NEXT:    [[Z1:%.*]] = add i64 [[Y1]], [[B1]]
-; CHECK-NEXT:    [[Z2:%.*]] = add i64 [[Y2]], [[B2]]
-; CHECK-NEXT:    [[R:%.*]] = mul i64 [[Z1]], [[Z2]]
-; CHECK-NEXT:    ret i64 [[R]]
-;
-  %X1 = sub i64 %A1, %B1
-  %X2 = sub i64 %A2, %B2
-  %Y1 = call i64 @llvm.ctlz.i64(i64 %X1, i1 true)
-  %Y2 = call i64 @llvm.ctlz.i64(i64 %X2, i1 false)
-  %Z1 = add i64 %Y1, %B1
-  %Z2 = add i64 %Y2, %B2
-  %R  = mul i64 %Z1, %Z2
-  ret i64 %R
-}
-
-; Basic depth-3 chain with cttz
-define i64 @testcttz(i64 %A1, i64 %A2, i64 %B1, i64 %B2) {
-; CHECK-LABEL: @testcttz(
-; CHECK-NEXT:    [[X1_V_I1_1:%.*]] = insertelement <2 x i64> undef, i64 [[B1:%.*]], i32 0
-; CHECK-NEXT:    [[X1_V_I1_2:%.*]] = insertelement <2 x i64> [[X1_V_I1_1]], i64 [[B2:%.*]], i32 1
-; CHECK-NEXT:    [[X1_V_I0_1:%.*]] = insertelement <2 x i64> undef, i64 [[A1:%.*]], i32 0
-; CHECK-NEXT:    [[X1_V_I0_2:%.*]] = insertelement <2 x i64> [[X1_V_I0_1]], i64 [[A2:%.*]], i32 1
-; CHECK-NEXT:    [[X1:%.*]] = sub <2 x i64> [[X1_V_I0_2]], [[X1_V_I1_2]]
-; CHECK-NEXT:    [[Y1:%.*]] = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> [[X1]], i1 true)
-; CHECK-NEXT:    [[Z1:%.*]] = add <2 x i64> [[Y1]], [[X1_V_I1_2]]
-; CHECK-NEXT:    [[Z1_V_R1:%.*]] = extractelement <2 x i64> [[Z1]], i32 0
-; CHECK-NEXT:    [[Z1_V_R2:%.*]] = extractelement <2 x i64> [[Z1]], i32 1
-; CHECK-NEXT:    [[R:%.*]] = mul i64 [[Z1_V_R1]], [[Z1_V_R2]]
-; CHECK-NEXT:    ret i64 [[R]]
-;
-  %X1 = sub i64 %A1, %B1
-  %X2 = sub i64 %A2, %B2
-  %Y1 = call i64 @llvm.cttz.i64(i64 %X1, i1 true)
-  %Y2 = call i64 @llvm.cttz.i64(i64 %X2, i1 true)
-  %Z1 = add i64 %Y1, %B1
-  %Z2 = add i64 %Y2, %B2
-  %R  = mul i64 %Z1, %Z2
-  ret i64 %R
-
-}
-
-; Basic depth-3 chain with cttz
-define i64 @testcttzneg(i64 %A1, i64 %A2, i64 %B1, i64 %B2) {
-; CHECK-LABEL: @testcttzneg(
-; CHECK-NEXT:    [[X1:%.*]] = sub i64 [[A1:%.*]], [[B1:%.*]]
-; CHECK-NEXT:    [[X2:%.*]] = sub i64 [[A2:%.*]], [[B2:%.*]]
-; CHECK-NEXT:    [[Y1:%.*]] = call i64 @llvm.cttz.i64(i64 [[X1]], i1 true), !range !0
-; CHECK-NEXT:    [[Y2:%.*]] = call i64 @llvm.cttz.i64(i64 [[X2]], i1 false), !range !0
-; CHECK-NEXT:    [[Z1:%.*]] = add i64 [[Y1]], [[B1]]
-; CHECK-NEXT:    [[Z2:%.*]] = add i64 [[Y2]], [[B2]]
-; CHECK-NEXT:    [[R:%.*]] = mul i64 [[Z1]], [[Z2]]
-; CHECK-NEXT:    ret i64 [[R]]
-;
-  %X1 = sub i64 %A1, %B1
-  %X2 = sub i64 %A2, %B2
-  %Y1 = call i64 @llvm.cttz.i64(i64 %X1, i1 true)
-  %Y2 = call i64 @llvm.cttz.i64(i64 %X2, i1 false)
-  %Z1 = add i64 %Y1, %B1
-  %Z2 = add i64 %Y2, %B2
-  %R  = mul i64 %Z1, %Z2
-  ret i64 %R
-}
-
-; CHECK: declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) #0
-; CHECK: declare <2 x double> @llvm.fmuladd.v2f64(<2 x double>, <2 x double>, <2 x double>) #0
-; CHECK: declare <2 x double> @llvm.cos.v2f64(<2 x double>) #0
-; CHECK: declare <2 x double> @llvm.powi.v2f64(<2 x double>, i32) #0
-; CHECK: declare <2 x double> @llvm.round.v2f64(<2 x double>) #0
-; CHECK: declare <2 x double> @llvm.copysign.v2f64(<2 x double>, <2 x double>) #0
-; CHECK: declare <2 x double> @llvm.ceil.v2f64(<2 x double>) #0
-; CHECK: declare <2 x double> @llvm.nearbyint.v2f64(<2 x double>) #0
-; CHECK: declare <2 x double> @llvm.rint.v2f64(<2 x double>) #0
-; CHECK: declare <2 x double> @llvm.trunc.v2f64(<2 x double>) #0
-; CHECK: declare <2 x double> @llvm.floor.v2f64(<2 x double>) #0
-; CHECK: declare <2 x double> @llvm.fabs.v2f64(<2 x double>) #0
-; CHECK: declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>) #0
-; CHECK: declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>) #0
-; CHECK: declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>, i1) #0
-; CHECK: declare <2 x i64> @llvm.cttz.v2i64(<2 x i64>, i1) #0
-; CHECK: attributes #0 = { nounwind readnone speculatable }
--- a/llvm/test/Transforms/BBVectorize/simple-ldstr-ptrs.ll
+++ b/llvm/test/Transforms/BBVectorize/simple-ldstr-ptrs.ll
@ -1,134 +0,0 @@
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s
-; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-aligned-only -instcombine -gvn -S | FileCheck %s -check-prefix=CHECK-AO
-
-; FIXME: re-enable this once pointer vectors work properly
-; XFAIL: *
-
-; Simple 3-pair chain also with loads and stores (using ptrs and gep)
-define double @test1(i64* %a, i64* %b, i64* %c) nounwind uwtable readonly {
-entry:
-  %i0 = load i64, i64* %a, align 8
-  %i1 = load i64, i64* %b, align 8
-  %mul = mul i64 %i0, %i1
-  %arrayidx3 = getelementptr inbounds i64, i64* %a, i64 1
-  %i3 = load i64, i64* %arrayidx3, align 8
-  %arrayidx4 = getelementptr inbounds i64, i64* %b, i64 1
-  %i4 = load i64, i64* %arrayidx4, align 8
-  %mul5 = mul i64 %i3, %i4
-  %ptr = inttoptr i64 %mul to double*
-  %ptr5 = inttoptr i64 %mul5 to double*
-  %aptr = getelementptr inbounds double, double* %ptr, i64 2
-  %aptr5 = getelementptr inbounds double, double* %ptr5, i64 3
-  %av = load double, double* %aptr, align 16
-  %av5 = load double, double* %aptr5, align 16
-  %r = fmul double %av, %av5
-  store i64 %mul, i64* %c, align 8
-  %arrayidx5 = getelementptr inbounds i64, i64* %c, i64 1
-  store i64 %mul5, i64* %arrayidx5, align 8
-  ret double %r
-; CHECK-LABEL: @test1(
-; CHECK: %i0.v.i0 = bitcast i64* %a to <2 x i64>*
-; CHECK: %i1.v.i0 = bitcast i64* %b to <2 x i64>*
-; CHECK: %i0 = load <2 x i64>, <2 x i64>* %i0.v.i0, align 8
-; CHECK: %i1 = load <2 x i64>, <2 x i64>* %i1.v.i0, align 8
-; CHECK: %mul = mul <2 x i64> %i0, %i1
-; CHECK: %ptr = inttoptr <2 x i64> %mul to <2 x double*>
-; CHECK: %aptr = getelementptr inbounds double, <2 x double*> %ptr, <2 x i64> <i64 2, i64 3>
-; CHECK: %aptr.v.r1 = extractelement <2 x double*> %aptr, i32 0
-; CHECK: %aptr.v.r2 = extractelement <2 x double*> %aptr, i32 1
-; CHECK: %av = load double, double* %aptr.v.r1, align 16
-; CHECK: %av5 = load double, double* %aptr.v.r2, align 16
-; CHECK: %r = fmul double %av, %av5
-; CHECK: %0 = bitcast i64* %c to <2 x i64>*
-; CHECK: store <2 x i64> %mul, <2 x i64>* %0, align 8
-; CHECK: ret double %r
-; CHECK-AO-LABEL: @test1(
-; CHECK-AO-NOT: load <2 x
-}
-
-; Simple 3-pair chain with loads and stores (using ptrs and gep)
-define void @test2(i64** %a, i64** %b, i64** %c) nounwind uwtable readonly {
-entry:
-  %i0 = load i64*, i64** %a, align 8
-  %i1 = load i64*, i64** %b, align 8
-  %arrayidx3 = getelementptr inbounds i64*, i64** %a, i64 1
-  %i3 = load i64*, i64** %arrayidx3, align 8
-  %arrayidx4 = getelementptr inbounds i64*, i64** %b, i64 1
-  %i4 = load i64*, i64** %arrayidx4, align 8
-  %o1 = load i64, i64* %i1, align 8
-  %o4 = load i64, i64* %i4, align 8
-  %ptr0 = getelementptr inbounds i64, i64* %i0, i64 %o1
-  %ptr3 = getelementptr inbounds i64, i64* %i3, i64 %o4
-  store i64* %ptr0, i64** %c, align 8
-  %arrayidx5 = getelementptr inbounds i64*, i64** %c, i64 1
-  store i64* %ptr3, i64** %arrayidx5, align 8
-  ret void
-; CHECK-LABEL: @test2(
-; CHECK: %i0.v.i0 = bitcast i64** %a to <2 x i64*>*
-; CHECK: %i1 = load i64*, i64** %b, align 8
-; CHECK: %i0 = load <2 x i64*>, <2 x i64*>* %i0.v.i0, align 8
-; CHECK: %arrayidx4 = getelementptr inbounds i64*, i64** %b, i64 1
-; CHECK: %i4 = load i64*, i64** %arrayidx4, align 8
-; CHECK: %o1 = load i64, i64* %i1, align 8
-; CHECK: %o4 = load i64, i64* %i4, align 8
-; CHECK: %ptr0.v.i1.1 = insertelement <2 x i64> undef, i64 %o1, i32 0
-; CHECK: %ptr0.v.i1.2 = insertelement <2 x i64> %ptr0.v.i1.1, i64 %o4, i32 1
-; CHECK: %ptr0 = getelementptr inbounds i64, <2 x i64*> %i0, <2 x i64> %ptr0.v.i1.2
-; CHECK: %0 = bitcast i64** %c to <2 x i64*>*
-; CHECK: store <2 x i64*> %ptr0, <2 x i64*>* %0, align 8
-; CHECK: ret void
-; CHECK-AO-LABEL: @test2(
-; CHECK-AO-NOT: <2 x
-}
-
-; Simple 3-pair chain with loads and stores (using ptrs and gep)
-; using pointer vectors.
-define void @test3(<2 x i64*>* %a, <2 x i64*>* %b, <2 x i64*>* %c) nounwind uwtable readonly {
-entry:
-  %i0 = load <2 x i64*>, <2 x i64*>* %a, align 8
-  %i1 = load <2 x i64*>, <2 x i64*>* %b, align 8
-  %arrayidx3 = getelementptr inbounds <2 x i64*>, <2 x i64*>* %a, i64 1
-  %i3 = load <2 x i64*>, <2 x i64*>* %arrayidx3, align 8
-  %arrayidx4 = getelementptr inbounds <2 x i64*>, <2 x i64*>* %b, i64 1
-  %i4 = load <2 x i64*>, <2 x i64*>* %arrayidx4, align 8
-  %j1 = extractelement <2 x i64*> %i1, i32 0
-  %j4 = extractelement <2 x i64*> %i4, i32 0
-  %o1 = load i64, i64* %j1, align 8
-  %o4 = load i64, i64* %j4, align 8
-  %j0 = extractelement <2 x i64*> %i0, i32 0
-  %j3 = extractelement <2 x i64*> %i3, i32 0
-  %ptr0 = getelementptr inbounds i64, i64* %j0, i64 %o1
-  %ptr3 = getelementptr inbounds i64, i64* %j3, i64 %o4
-  %qtr0 = insertelement <2 x i64*> undef, i64* %ptr0, i32 0
-  %rtr0 = insertelement <2 x i64*> %qtr0, i64* %ptr0, i32 1
-  %qtr3 = insertelement <2 x i64*> undef, i64* %ptr3, i32 0
-  %rtr3 = insertelement <2 x i64*> %qtr3, i64* %ptr3, i32 1
-  store <2 x i64*> %rtr0, <2 x i64*>* %c, align 8
-  %arrayidx5 = getelementptr inbounds <2 x i64*>, <2 x i64*>* %c, i64 1
-  store <2 x i64*> %rtr3, <2 x i64*>* %arrayidx5, align 8
-  ret void
-; CHECK-LABEL: @test3(
-; CHECK: %i0.v.i0 = bitcast <2 x i64*>* %a to <4 x i64*>*
-; CHECK: %i1 = load <2 x i64*>, <2 x i64*>* %b, align 8
-; CHECK: %i0 = load <4 x i64*>, <4 x i64*>* %i0.v.i0, align 8
-; CHECK: %arrayidx4 = getelementptr inbounds <2 x i64*>, <2 x i64*>* %b, i64 1
-; CHECK: %i4 = load <2 x i64*>, <2 x i64*>* %arrayidx4, align 8
-; CHECK: %j1 = extractelement <2 x i64*> %i1, i32 0
-; CHECK: %j4 = extractelement <2 x i64*> %i4, i32 0
-; CHECK: %o1 = load i64, i64* %j1, align 8
-; CHECK: %o4 = load i64, i64* %j4, align 8
-; CHECK: %ptr0.v.i1.1 = insertelement <2 x i64> undef, i64 %o1, i32 0
-; CHECK: %ptr0.v.i1.2 = insertelement <2 x i64> %ptr0.v.i1.1, i64 %o4, i32 1
-; CHECK: %ptr0.v.i0 = shufflevector <4 x i64*> %i0, <4 x i64*> undef, <2 x i32> <i32 0, i32 2>
-; CHECK: %ptr0 = getelementptr inbounds i64, <2 x i64*> %ptr0.v.i0, <2 x i64> %ptr0.v.i1.2
-; CHECK: %rtr0 = shufflevector <2 x i64*> %ptr0, <2 x i64*> undef, <2 x i32> zeroinitializer
-; CHECK: %rtr3 = shufflevector <2 x i64*> %ptr0, <2 x i64*> undef, <2 x i32> <i32 1, i32 1>
-; CHECK: %0 = bitcast <2 x i64*>* %c to <4 x i64*>*
-; CHECK: %1 = shufflevector <2 x i64*> %rtr0, <2 x i64*> %rtr3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK: store <4 x i64*> %1, <4 x i64*>* %0, align 8
-; CHECK: ret void
-; CHECK-AO-LABEL: @test3(
-; CHECK-AO-NOT: <4 x
-}
-
--- a/llvm/test/Transforms/BBVectorize/simple-ldstr.ll
+++ b/llvm/test/Transforms/BBVectorize/simple-ldstr.ll
@ -1,170 +0,0 @@
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-ignore-target-info -instcombine -gvn -S | FileCheck %s
-; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-aligned-only -bb-vectorize-ignore-target-info -instcombine -gvn -S | FileCheck %s -check-prefix=CHECK-AO
-
-; Simple 3-pair chain with loads and stores
-define void @test1(double* %a, double* %b, double* %c) nounwind uwtable readonly {
-entry:
-  %i0 = load double, double* %a, align 8
-  %i1 = load double, double* %b, align 8
-  %mul = fmul double %i0, %i1
-  %arrayidx3 = getelementptr inbounds double, double* %a, i64 1
-  %i3 = load double, double* %arrayidx3, align 8
-  %arrayidx4 = getelementptr inbounds double, double* %b, i64 1
-  %i4 = load double, double* %arrayidx4, align 8
-  %mul5 = fmul double %i3, %i4
-  store double %mul, double* %c, align 8
-  %arrayidx5 = getelementptr inbounds double, double* %c, i64 1
-  store double %mul5, double* %arrayidx5, align 8
-  ret void
-; CHECK-LABEL: @test1(
-; CHECK: %i0.v.i0 = bitcast double* %a to <2 x double>*
-; CHECK: %i1.v.i0 = bitcast double* %b to <2 x double>*
-; CHECK: %i0 = load <2 x double>, <2 x double>* %i0.v.i0, align 8
-; CHECK: %i1 = load <2 x double>, <2 x double>* %i1.v.i0, align 8
-; CHECK: %mul = fmul <2 x double> %i0, %i1
-; CHECK: %0 = bitcast double* %c to <2 x double>*
-; CHECK: store <2 x double> %mul, <2 x double>* %0, align 8
-; CHECK: ret void
-; CHECK-AO-LABEL: @test1(
-; CHECK-AO-NOT: <2 x double>
-}
-
-; Simple chain with extending loads and stores
-define void @test2(float* %a, float* %b, double* %c) nounwind uwtable readonly {
-entry:
-  %i0f = load float, float* %a, align 4
-  %i0 = fpext float %i0f to double
-  %i1f = load float, float* %b, align 4
-  %i1 = fpext float %i1f to double
-  %mul = fmul double %i0, %i1
-  %arrayidx3 = getelementptr inbounds float, float* %a, i64 1
-  %i3f = load float, float* %arrayidx3, align 4
-  %i3 = fpext float %i3f to double
-  %arrayidx4 = getelementptr inbounds float, float* %b, i64 1
-  %i4f = load float, float* %arrayidx4, align 4
-  %i4 = fpext float %i4f to double
-  %mul5 = fmul double %i3, %i4
-  store double %mul, double* %c, align 8
-  %arrayidx5 = getelementptr inbounds double, double* %c, i64 1
-  store double %mul5, double* %arrayidx5, align 8
-  ret void
-; CHECK-LABEL: @test2(
-; CHECK: %i0f.v.i0 = bitcast float* %a to <2 x float>*
-; CHECK: %i1f.v.i0 = bitcast float* %b to <2 x float>*
-; CHECK: %i0f = load <2 x float>, <2 x float>* %i0f.v.i0, align 4
-; CHECK: %i0 = fpext <2 x float> %i0f to <2 x double>
-; CHECK: %i1f = load <2 x float>, <2 x float>* %i1f.v.i0, align 4
-; CHECK: %i1 = fpext <2 x float> %i1f to <2 x double>
-; CHECK: %mul = fmul <2 x double> %i0, %i1
-; CHECK: %0 = bitcast double* %c to <2 x double>*
-; CHECK: store <2 x double> %mul, <2 x double>* %0, align 8
-; CHECK: ret void
-; CHECK-AO-LABEL: @test2(
-; CHECK-AO-NOT: <2 x double>
-}
-
-; Simple chain with loads and truncating stores
-define void @test3(double* %a, double* %b, float* %c) nounwind uwtable readonly {
-entry:
-  %i0 = load double, double* %a, align 8
-  %i1 = load double, double* %b, align 8
-  %mul = fmul double %i0, %i1
-  %mulf = fptrunc double %mul to float
-  %arrayidx3 = getelementptr inbounds double, double* %a, i64 1
-  %i3 = load double, double* %arrayidx3, align 8
-  %arrayidx4 = getelementptr inbounds double, double* %b, i64 1
-  %i4 = load double, double* %arrayidx4, align 8
-  %mul5 = fmul double %i3, %i4
-  %mul5f = fptrunc double %mul5 to float
-  store float %mulf, float* %c, align 8
-  %arrayidx5 = getelementptr inbounds float, float* %c, i64 1
-  store float %mul5f, float* %arrayidx5, align 4
-  ret void
-; CHECK-LABEL: @test3(
-; CHECK: %i0.v.i0 = bitcast double* %a to <2 x double>*
-; CHECK: %i1.v.i0 = bitcast double* %b to <2 x double>*
-; CHECK: %i0 = load <2 x double>, <2 x double>* %i0.v.i0, align 8
-; CHECK: %i1 = load <2 x double>, <2 x double>* %i1.v.i0, align 8
-; CHECK: %mul = fmul <2 x double> %i0, %i1
-; CHECK: %mulf = fptrunc <2 x double> %mul to <2 x float>
-; CHECK: %0 = bitcast float* %c to <2 x float>*
-; CHECK: store <2 x float> %mulf, <2 x float>* %0, align 8
-; CHECK: ret void
-; CHECK-AO-LABEL: @test3(
-; CHECK-AO: %i0 = load double, double* %a, align 8
-; CHECK-AO: %i1 = load double, double* %b, align 8
-; CHECK-AO: %arrayidx3 = getelementptr inbounds double, double* %a, i64 1
-; CHECK-AO: %i3 = load double, double* %arrayidx3, align 8
-; CHECK-AO: %arrayidx4 = getelementptr inbounds double, double* %b, i64 1
-; CHECK-AO: %i4 = load double, double* %arrayidx4, align 8
-; CHECK-AO: %mul.v.i1.1 = insertelement <2 x double> undef, double %i1, i32 0
-; CHECK-AO: %mul.v.i1.2 = insertelement <2 x double> %mul.v.i1.1, double %i4, i32 1
-; CHECK-AO: %mul.v.i0.1 = insertelement <2 x double> undef, double %i0, i32 0
-; CHECK-AO: %mul.v.i0.2 = insertelement <2 x double> %mul.v.i0.1, double %i3, i32 1
-; CHECK-AO: %mul = fmul <2 x double> %mul.v.i0.2, %mul.v.i1.2
-; CHECK-AO: %mulf = fptrunc <2 x double> %mul to <2 x float>
-; CHECK-AO: %0 = bitcast float* %c to <2 x float>*
-; CHECK-AO: store <2 x float> %mulf, <2 x float>* %0, align 8
-; CHECK-AO: ret void
-}
-
-; Simple 3-pair chain with loads and stores (unreachable)
-define void @test4(i1 %bool, double* %a, double* %b, double* %c) nounwind uwtable readonly {
-entry:
-  br i1 %bool, label %if.then1, label %if.end
-
-if.then1:
-  unreachable
-  br label %if.then
-
-if.then:
-  %i0 = load double, double* %a, align 8
-  %i1 = load double, double* %b, align 8
-  %mul = fmul double %i0, %i1
-  %arrayidx3 = getelementptr inbounds double, double* %a, i64 1
-  %i3 = load double, double* %arrayidx3, align 8
-  %arrayidx4 = getelementptr inbounds double, double* %b, i64 1
-  %i4 = load double, double* %arrayidx4, align 8
-  %mul5 = fmul double %i3, %i4
-  store double %mul, double* %c, align 8
-  %arrayidx5 = getelementptr inbounds double, double* %c, i64 1
-  store double %mul5, double* %arrayidx5, align 8
-  br label %if.end
-
-if.end:
-  ret void
-; CHECK-LABEL: @test4(
-; CHECK-NOT: <2 x double>
-; CHECK-AO-LABEL: @test4(
-; CHECK-AO-NOT: <2 x double>
-}
-
-; Simple 3-pair chain with loads and stores
-define void @test5(double* %a, double* %b, double* %c) nounwind uwtable readonly {
-entry:
-  %i0 = load double, double* %a, align 8
-  %i1 = load double, double* %b, align 8
-  %mul = fmul double %i0, %i1
-  %arrayidx3 = getelementptr inbounds double, double* %a, i64 1
-  %i3 = load double, double* %arrayidx3, align 8
-  %arrayidx4 = getelementptr inbounds double, double* %b, i64 1
-  %i4 = load double, double* %arrayidx4, align 8
-  %mul5 = fmul double %i3, %i4
-  %arrayidx5 = getelementptr inbounds double, double* %c, i64 1
-  store double %mul5, double* %arrayidx5, align 8
-  store double %mul, double* %c, align 4
-  ret void
-; CHECK-LABEL: @test5(
-; CHECK: %i0.v.i0 = bitcast double* %a to <2 x double>*
-; CHECK: %i1.v.i0 = bitcast double* %b to <2 x double>*
-; CHECK: %i0 = load <2 x double>, <2 x double>* %i0.v.i0, align 8
-; CHECK: %i1 = load <2 x double>, <2 x double>* %i1.v.i0, align 8
-; CHECK: %mul = fmul <2 x double> %i0, %i1
-; CHECK: %0 = bitcast double* %c to <2 x double>*
-; CHECK: store <2 x double> %mul, <2 x double>* %0, align 4
-; CHECK: ret void
-; CHECK-AO-LABEL: @test5(
-; CHECK-AO-NOT: <2 x double>
-}
-
--- a/llvm/test/Transforms/BBVectorize/simple-sel.ll
+++ b/llvm/test/Transforms/BBVectorize/simple-sel.ll
@ -1,59 +0,0 @@
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
-; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-ignore-target-info -instcombine -gvn -S | FileCheck %s
-; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-no-bools -bb-vectorize-ignore-target-info -instcombine -gvn -S | FileCheck %s -check-prefix=CHECK-NB
-
-; Basic depth-3 chain with select
-define double @test1(double %A1, double %A2, double %B1, double %B2, i1 %C1, i1 %C2) {
-; CHECK-LABEL: @test1(
-; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
-; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
-; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
-; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
-	%X1 = fsub double %A1, %B1
-	%X2 = fsub double %A2, %B2
-; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
-	%Y1 = fmul double %X1, %A1
-	%Y2 = fmul double %X2, %A2
-; CHECK: %Y1 = fmul <2 x double> %X1, %X1.v.i0.2
-        %Z1 = select i1 %C1, double %Y1, double %B1
-        %Z2 = select i1 %C2, double %Y2, double %B2
-; CHECK: %Z1.v.i0.1 = insertelement <2 x i1> undef, i1 %C1, i32 0
-; CHECK: %Z1.v.i0.2 = insertelement <2 x i1> %Z1.v.i0.1, i1 %C2, i32 1
-; CHECK: %Z1 = select <2 x i1> %Z1.v.i0.2, <2 x double> %Y1, <2 x double> %X1.v.i1.2
-	%R  = fmul double %Z1, %Z2
-; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0
-; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1
-; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2
-	ret double %R
-; CHECK: ret double %R
-}
-
-; Basic depth-3 chain with select (and vect. compare)
-define double @test2(double %A1, double %A2, double %B1, double %B2) {
-; CHECK-LABEL: @test2(
-; CHECK-NB-LABEL: @test2(
-; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
-; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
-; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
-; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
-	%X1 = fsub double %A1, %B1
-	%X2 = fsub double %A2, %B2
-; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
-	%Y1 = fmul double %X1, %A1
-	%Y2 = fmul double %X2, %A2
-; CHECK: %Y1 = fmul <2 x double> %X1, %X1.v.i0.2
-	%C1 = fcmp ogt double %X1, %A1
-        %C2 = fcmp ogt double %X2, %A2
-; CHECK: %C1 = fcmp ogt <2 x double> %X1, %X1.v.i0.2
-; CHECK-NB: fcmp ogt double
-        %Z1 = select i1 %C1, double %Y1, double %B1
-        %Z2 = select i1 %C2, double %Y2, double %B2
-; CHECK: %Z1 = select <2 x i1> %C1, <2 x double> %Y1, <2 x double> %X1.v.i1.2
-	%R  = fmul double %Z1, %Z2
-; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0
-; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1
-; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2
-	ret double %R
-; CHECK: ret double %R
-}
-
--- a/llvm/test/Transforms/BBVectorize/simple-tst.ll
+++ b/llvm/test/Transforms/BBVectorize/simple-tst.ll
@ -1,18 +0,0 @@
-target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
-target triple = "powerpc64-unknown-linux"
-; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-vector-bits=256 -instcombine -gvn -S | FileCheck %s
-
-; Basic depth-3 chain (target-specific type should not vectorize)
-define ppc_fp128 @test7(ppc_fp128 %A1, ppc_fp128 %A2, ppc_fp128 %B1, ppc_fp128 %B2) {
-; CHECK-LABEL: @test7(
-; CHECK-NOT: <2 x ppc_fp128>
-	%X1 = fsub ppc_fp128 %A1, %B1
-	%X2 = fsub ppc_fp128 %A2, %B2
-	%Y1 = fmul ppc_fp128 %X1, %A1
-	%Y2 = fmul ppc_fp128 %X2, %A2
-	%Z1 = fadd ppc_fp128 %Y1, %B1
-	%Z2 = fadd ppc_fp128 %Y2, %B2
-	%R  = fmul ppc_fp128 %Z1, %Z2
-	ret ppc_fp128 %R
-}
-
--- a/llvm/test/Transforms/BBVectorize/simple.ll
+++ b/llvm/test/Transforms/BBVectorize/simple.ll
@ -1,209 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-ignore-target-info -instcombine -gvn -S | FileCheck %s
-
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
-
-; Basic depth-3 chain
-define double @test1(double %A1, double %A2, double %B1, double %B2) {
-; CHECK-LABEL: @test1(
-; CHECK-NEXT:    [[X1_V_I1_1:%.*]] = insertelement <2 x double> undef, double [[B1:%.*]], i32 0
-; CHECK-NEXT:    [[X1_V_I1_2:%.*]] = insertelement <2 x double> [[X1_V_I1_1]], double [[B2:%.*]], i32 1
-; CHECK-NEXT:    [[X1_V_I0_1:%.*]] = insertelement <2 x double> undef, double [[A1:%.*]], i32 0
-; CHECK-NEXT:    [[X1_V_I0_2:%.*]] = insertelement <2 x double> [[X1_V_I0_1]], double [[A2:%.*]], i32 1
-; CHECK-NEXT:    [[X1:%.*]] = fsub <2 x double> [[X1_V_I0_2]], [[X1_V_I1_2]]
-; CHECK-NEXT:    [[Y1:%.*]] = fmul <2 x double> [[X1]], [[X1_V_I0_2]]
-; CHECK-NEXT:    [[Z1:%.*]] = fadd <2 x double> [[Y1]], [[X1_V_I1_2]]
-; CHECK-NEXT:    [[Z1_V_R1:%.*]] = extractelement <2 x double> [[Z1]], i32 0
-; CHECK-NEXT:    [[Z1_V_R2:%.*]] = extractelement <2 x double> [[Z1]], i32 1
-; CHECK-NEXT:    [[R:%.*]] = fmul double [[Z1_V_R1]], [[Z1_V_R2]]
-; CHECK-NEXT:    ret double [[R]]
-;
-  %X1 = fsub double %A1, %B1
-  %X2 = fsub double %A2, %B2
-  %Y1 = fmul double %X1, %A1
-  %Y2 = fmul double %X2, %A2
-  %Z1 = fadd double %Y1, %B1
-  %Z2 = fadd double %Y2, %B2
-  %R  = fmul double %Z1, %Z2
-  ret double %R
-}
-
-; Basic depth-3 chain (last pair permuted)
-define double @test2(double %A1, double %A2, double %B1, double %B2) {
-; CHECK-LABEL: @test2(
-; CHECK-NEXT:    [[X1_V_I1_1:%.*]] = insertelement <2 x double> undef, double [[B1:%.*]], i32 0
-; CHECK-NEXT:    [[X1_V_I1_2:%.*]] = insertelement <2 x double> [[X1_V_I1_1]], double [[B2:%.*]], i32 1
-; CHECK-NEXT:    [[X1_V_I0_1:%.*]] = insertelement <2 x double> undef, double [[A1:%.*]], i32 0
-; CHECK-NEXT:    [[X1_V_I0_2:%.*]] = insertelement <2 x double> [[X1_V_I0_1]], double [[A2:%.*]], i32 1
-; CHECK-NEXT:    [[X1:%.*]] = fsub <2 x double> [[X1_V_I0_2]], [[X1_V_I1_2]]
-; CHECK-NEXT:    [[Y1:%.*]] = fmul <2 x double> [[X1]], [[X1_V_I0_2]]
-; CHECK-NEXT:    [[Z1_V_I1_1:%.*]] = insertelement <2 x double> undef, double [[B2]], i32 0
-; CHECK-NEXT:    [[Z1_V_I1_2:%.*]] = insertelement <2 x double> [[Z1_V_I1_1]], double [[B1]], i32 1
-; CHECK-NEXT:    [[Z2:%.*]] = fadd <2 x double> [[Y1]], [[Z1_V_I1_2]]
-; CHECK-NEXT:    [[Z2_V_R1:%.*]] = extractelement <2 x double> [[Z2]], i32 0
-; CHECK-NEXT:    [[Z2_V_R2:%.*]] = extractelement <2 x double> [[Z2]], i32 1
-; CHECK-NEXT:    [[R:%.*]] = fmul double [[Z2_V_R2]], [[Z2_V_R1]]
-; CHECK-NEXT:    ret double [[R]]
-;
-  %X1 = fsub double %A1, %B1
-  %X2 = fsub double %A2, %B2
-  %Y1 = fmul double %X1, %A1
-  %Y2 = fmul double %X2, %A2
-  %Z1 = fadd double %Y2, %B1
-  %Z2 = fadd double %Y1, %B2
-  %R  = fmul double %Z1, %Z2
-  ret double %R
-}
-
-; Basic depth-3 chain (last pair first splat)
-define double @test3(double %A1, double %A2, double %B1, double %B2) {
-; CHECK-LABEL: @test3(
-; CHECK-NEXT:    [[X1_V_I1_1:%.*]] = insertelement <2 x double> undef, double [[B1:%.*]], i32 0
-; CHECK-NEXT:    [[X1_V_I1_2:%.*]] = insertelement <2 x double> [[X1_V_I1_1]], double [[B2:%.*]], i32 1
-; CHECK-NEXT:    [[X1_V_I0_1:%.*]] = insertelement <2 x double> undef, double [[A1:%.*]], i32 0
-; CHECK-NEXT:    [[X1_V_I0_2:%.*]] = insertelement <2 x double> [[X1_V_I0_1]], double [[A2:%.*]], i32 1
-; CHECK-NEXT:    [[X1:%.*]] = fsub <2 x double> [[X1_V_I0_2]], [[X1_V_I1_2]]
-; CHECK-NEXT:    [[Y1:%.*]] = fmul <2 x double> [[X1]], [[X1_V_I0_2]]
-; CHECK-NEXT:    [[Z1_V_I0:%.*]] = shufflevector <2 x double> [[Y1]], <2 x double> undef, <2 x i32> <i32 1, i32 1>
-; CHECK-NEXT:    [[Z1:%.*]] = fadd <2 x double> [[Z1_V_I0]], [[X1_V_I1_2]]
-; CHECK-NEXT:    [[Z1_V_R1:%.*]] = extractelement <2 x double> [[Z1]], i32 0
-; CHECK-NEXT:    [[Z1_V_R2:%.*]] = extractelement <2 x double> [[Z1]], i32 1
-; CHECK-NEXT:    [[R:%.*]] = fmul double [[Z1_V_R1]], [[Z1_V_R2]]
-; CHECK-NEXT:    ret double [[R]]
-;
-  %X1 = fsub double %A1, %B1
-  %X2 = fsub double %A2, %B2
-  %Y1 = fmul double %X1, %A1
-  %Y2 = fmul double %X2, %A2
-  %Z1 = fadd double %Y2, %B1
-  %Z2 = fadd double %Y2, %B2
-  %R  = fmul double %Z1, %Z2
-  ret double %R
-}
-
-; Basic depth-3 chain (last pair second splat)
-define double @test4(double %A1, double %A2, double %B1, double %B2) {
-; CHECK-LABEL: @test4(
-; CHECK-NEXT:    [[X1_V_I1_1:%.*]] = insertelement <2 x double> undef, double [[B1:%.*]], i32 0
-; CHECK-NEXT:    [[X1_V_I1_2:%.*]] = insertelement <2 x double> [[X1_V_I1_1]], double [[B2:%.*]], i32 1
-; CHECK-NEXT:    [[X1_V_I0_1:%.*]] = insertelement <2 x double> undef, double [[A1:%.*]], i32 0
-; CHECK-NEXT:    [[X1_V_I0_2:%.*]] = insertelement <2 x double> [[X1_V_I0_1]], double [[A2:%.*]], i32 1
-; CHECK-NEXT:    [[X1:%.*]] = fsub <2 x double> [[X1_V_I0_2]], [[X1_V_I1_2]]
-; CHECK-NEXT:    [[Y1:%.*]] = fmul <2 x double> [[X1]], [[X1_V_I0_2]]
-; CHECK-NEXT:    [[Z1_V_I0:%.*]] = shufflevector <2 x double> [[Y1]], <2 x double> undef, <2 x i32> zeroinitializer
-; CHECK-NEXT:    [[Z1:%.*]] = fadd <2 x double> [[Z1_V_I0]], [[X1_V_I1_2]]
-; CHECK-NEXT:    [[Z1_V_R1:%.*]] = extractelement <2 x double> [[Z1]], i32 0
-; CHECK-NEXT:    [[Z1_V_R2:%.*]] = extractelement <2 x double> [[Z1]], i32 1
-; CHECK-NEXT:    [[R:%.*]] = fmul double [[Z1_V_R1]], [[Z1_V_R2]]
-; CHECK-NEXT:    ret double [[R]]
-;
-  %X1 = fsub double %A1, %B1
-  %X2 = fsub double %A2, %B2
-  %Y1 = fmul double %X1, %A1
-  %Y2 = fmul double %X2, %A2
-  %Z1 = fadd double %Y1, %B1
-  %Z2 = fadd double %Y1, %B2
-  %R  = fmul double %Z1, %Z2
-  ret double %R
-}
-
-; Basic depth-3 chain
-define <2 x float> @test5(<2 x float> %A1, <2 x float> %A2, <2 x float> %B1, <2 x float> %B2) {
-; CHECK-LABEL: @test5(
-; CHECK-NEXT:    [[X1_V_I1:%.*]] = shufflevector <2 x float> [[B1:%.*]], <2 x float> [[B2:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT:    [[X1_V_I0:%.*]] = shufflevector <2 x float> [[A1:%.*]], <2 x float> [[A2:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT:    [[X1:%.*]] = fsub <4 x float> [[X1_V_I0]], [[X1_V_I1]]
-; CHECK-NEXT:    [[Y1:%.*]] = fmul <4 x float> [[X1]], [[X1_V_I0]]
-; CHECK-NEXT:    [[Z1:%.*]] = fadd <4 x float> [[Y1]], [[X1_V_I1]]
-; CHECK-NEXT:    [[Z1_V_R1:%.*]] = shufflevector <4 x float> [[Z1]], <4 x float> undef, <2 x i32> <i32 0, i32 1>
-; CHECK-NEXT:    [[Z1_V_R2:%.*]] = shufflevector <4 x float> [[Z1]], <4 x float> undef, <2 x i32> <i32 2, i32 3>
-; CHECK-NEXT:    [[R:%.*]] = fmul <2 x float> [[Z1_V_R1]], [[Z1_V_R2]]
-; CHECK-NEXT:    ret <2 x float> [[R]]
-;
-  %X1 = fsub <2 x float> %A1, %B1
-  %X2 = fsub <2 x float> %A2, %B2
-  %Y1 = fmul <2 x float> %X1, %A1
-  %Y2 = fmul <2 x float> %X2, %A2
-  %Z1 = fadd <2 x float> %Y1, %B1
-  %Z2 = fadd <2 x float> %Y2, %B2
-  %R  = fmul <2 x float> %Z1, %Z2
-  ret <2 x float> %R
-}
-
-; Basic chain with shuffles
-define <8 x i8> @test6(<8 x i8> %A1, <8 x i8> %A2, <8 x i8> %B1, <8 x i8> %B2) {
-; CHECK-LABEL: @test6(
-; CHECK-NEXT:    [[X1_V_I1:%.*]] = shufflevector <8 x i8> [[B1:%.*]], <8 x i8> [[B2:%.*]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; CHECK-NEXT:    [[X1_V_I0:%.*]] = shufflevector <8 x i8> [[A1:%.*]], <8 x i8> [[A2:%.*]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; CHECK-NEXT:    [[X1:%.*]] = sub <16 x i8> [[X1_V_I0]], [[X1_V_I1]]
-; CHECK-NEXT:    [[Y1:%.*]] = mul <16 x i8> [[X1]], [[X1_V_I0]]
-; CHECK-NEXT:    [[Z1:%.*]] = add <16 x i8> [[Y1]], [[X1_V_I1]]
-; CHECK-NEXT:    [[Q1_V_I1:%.*]] = shufflevector <16 x i8> [[Z1]], <16 x i8> undef, <16 x i32> <i32 8, i32 undef, i32 10, i32 undef, i32 undef, i32 13, i32 undef, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-; CHECK-NEXT:    [[Q1:%.*]] = shufflevector <16 x i8> [[Z1]], <16 x i8> [[Q1_V_I1]], <16 x i32> <i32 23, i32 16, i32 6, i32 1, i32 21, i32 18, i32 4, i32 3, i32 14, i32 15, i32 8, i32 9, i32 10, i32 12, i32 12, i32 9>
-; CHECK-NEXT:    [[Q1_V_R1:%.*]] = shufflevector <16 x i8> [[Q1]], <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT:    [[Q1_V_R2:%.*]] = shufflevector <16 x i8> [[Q1]], <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; CHECK-NEXT:    [[R:%.*]] = mul <8 x i8> [[Q1_V_R1]], [[Q1_V_R2]]
-; CHECK-NEXT:    ret <8 x i8> [[R]]
-;
-  %X1 = sub <8 x i8> %A1, %B1
-  %X2 = sub <8 x i8> %A2, %B2
-  %Y1 = mul <8 x i8> %X1, %A1
-  %Y2 = mul <8 x i8> %X2, %A2
-  %Z1 = add <8 x i8> %Y1, %B1
-  %Z2 = add <8 x i8> %Y2, %B2
-  %Q1 = shufflevector <8 x i8> %Z1, <8 x i8> %Z2, <8 x i32> <i32 15, i32 8, i32 6, i32 1, i32 13, i32 10, i32 4, i32 3>
-  %Q2 = shufflevector <8 x i8> %Z2, <8 x i8> %Z2, <8 x i32> <i32 6, i32 7, i32 0, i32 1, i32 2, i32 4, i32 4, i32 1>
-  %R  = mul <8 x i8> %Q1, %Q2
-  ret <8 x i8> %R
-}
-
-; Basic depth-3 chain (flipped order)
-define double @test7(double %A1, double %A2, double %B1, double %B2) {
-; CHECK-LABEL: @test7(
-; CHECK-NEXT:    [[X1_V_I1_1:%.*]] = insertelement <2 x double> undef, double [[B1:%.*]], i32 0
-; CHECK-NEXT:    [[X1_V_I1_2:%.*]] = insertelement <2 x double> [[X1_V_I1_1]], double [[B2:%.*]], i32 1
-; CHECK-NEXT:    [[X1_V_I0_1:%.*]] = insertelement <2 x double> undef, double [[A1:%.*]], i32 0
-; CHECK-NEXT:    [[X1_V_I0_2:%.*]] = insertelement <2 x double> [[X1_V_I0_1]], double [[A2:%.*]], i32 1
-; CHECK-NEXT:    [[X1:%.*]] = fsub <2 x double> [[X1_V_I0_2]], [[X1_V_I1_2]]
-; CHECK-NEXT:    [[Y1:%.*]] = fmul <2 x double> [[X1]], [[X1_V_I0_2]]
-; CHECK-NEXT:    [[Z1:%.*]] = fadd <2 x double> [[Y1]], [[X1_V_I1_2]]
-; CHECK-NEXT:    [[Z1_V_R1:%.*]] = extractelement <2 x double> [[Z1]], i32 0
-; CHECK-NEXT:    [[Z1_V_R2:%.*]] = extractelement <2 x double> [[Z1]], i32 1
-; CHECK-NEXT:    [[R:%.*]] = fmul double [[Z1_V_R1]], [[Z1_V_R2]]
-; CHECK-NEXT:    ret double [[R]]
-;
-  %X1 = fsub double %A1, %B1
-  %X2 = fsub double %A2, %B2
-  %Y1 = fmul double %X1, %A1
-  %Y2 = fmul double %X2, %A2
-  %Z2 = fadd double %Y2, %B2
-  %Z1 = fadd double %Y1, %B1
-  %R  = fmul double %Z1, %Z2
-  ret double %R
-}
-
-; Basic depth-3 chain (subclass data)
-define i64 @test8(i64 %A1, i64 %A2, i64 %B1, i64 %B2) {
-; CHECK-LABEL: @test8(
-; CHECK-NEXT:    [[X1_V_I1_1:%.*]] = insertelement <2 x i64> undef, i64 [[B1:%.*]], i32 0
-; CHECK-NEXT:    [[X1_V_I1_2:%.*]] = insertelement <2 x i64> [[X1_V_I1_1]], i64 [[B2:%.*]], i32 1
-; CHECK-NEXT:    [[X1_V_I0_1:%.*]] = insertelement <2 x i64> undef, i64 [[A1:%.*]], i32 0
-; CHECK-NEXT:    [[X1_V_I0_2:%.*]] = insertelement <2 x i64> [[X1_V_I0_1]], i64 [[A2:%.*]], i32 1
-; CHECK-NEXT:    [[X1:%.*]] = sub <2 x i64> [[X1_V_I0_2]], [[X1_V_I1_2]]
-; CHECK-NEXT:    [[Y1:%.*]] = mul <2 x i64> [[X1]], [[X1_V_I0_2]]
-; CHECK-NEXT:    [[Z1:%.*]] = add <2 x i64> [[Y1]], [[X1_V_I1_2]]
-; CHECK-NEXT:    [[Z1_V_R1:%.*]] = extractelement <2 x i64> [[Z1]], i32 0
-; CHECK-NEXT:    [[Z1_V_R2:%.*]] = extractelement <2 x i64> [[Z1]], i32 1
-; CHECK-NEXT:    [[R:%.*]] = mul i64 [[Z1_V_R1]], [[Z1_V_R2]]
-; CHECK-NEXT:    ret i64 [[R]]
-;
-  %X1 = sub nsw i64 %A1, %B1
-  %X2 = sub i64 %A2, %B2
-  %Y1 = mul i64 %X1, %A1
-  %Y2 = mul i64 %X2, %A2
-  %Z1 = add i64 %Y1, %B1
-  %Z2 = add i64 %Y2, %B2
-  %R  = mul i64 %Z1, %Z2
-  ret i64 %R
-}
-
--- a/llvm/test/Transforms/BBVectorize/simple3.ll
+++ b/llvm/test/Transforms/BBVectorize/simple3.ll
@ -1,38 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-vector-bits=192 -bb-vectorize-ignore-target-info -instcombine -gvn -S | FileCheck %s
-
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
-
-; Basic depth-3 chain
-define double @test1(double %A1, double %A2, double %A3, double %B1, double %B2, double %B3) {
-; CHECK-LABEL: @test1(
-; CHECK-NEXT:    [[X1_V_I1_11:%.*]] = insertelement <3 x double> undef, double [[B1:%.*]], i32 0
-; CHECK-NEXT:    [[X1_V_I1_22:%.*]] = insertelement <3 x double> [[X1_V_I1_11]], double [[B2:%.*]], i32 1
-; CHECK-NEXT:    [[X1_V_I1:%.*]] = insertelement <3 x double> [[X1_V_I1_22]], double [[B3:%.*]], i32 2
-; CHECK-NEXT:    [[X1_V_I0_13:%.*]] = insertelement <3 x double> undef, double [[A1:%.*]], i32 0
-; CHECK-NEXT:    [[X1_V_I0_24:%.*]] = insertelement <3 x double> [[X1_V_I0_13]], double [[A2:%.*]], i32 1
-; CHECK-NEXT:    [[X1_V_I0:%.*]] = insertelement <3 x double> [[X1_V_I0_24]], double [[A3:%.*]], i32 2
-; CHECK-NEXT:    [[X1:%.*]] = fsub <3 x double> [[X1_V_I0]], [[X1_V_I1]]
-; CHECK-NEXT:    [[Y1:%.*]] = fmul <3 x double> [[X1]], [[X1_V_I0]]
-; CHECK-NEXT:    [[Z1:%.*]] = fadd <3 x double> [[Y1]], [[X1_V_I1]]
-; CHECK-NEXT:    [[Z1_V_R210:%.*]] = extractelement <3 x double> [[Z1]], i32 2
-; CHECK-NEXT:    [[Z1_V_R1:%.*]] = extractelement <3 x double> [[Z1]], i32 0
-; CHECK-NEXT:    [[Z1_V_R2:%.*]] = extractelement <3 x double> [[Z1]], i32 1
-; CHECK-NEXT:    [[R1:%.*]] = fmul double [[Z1_V_R1]], [[Z1_V_R2]]
-; CHECK-NEXT:    [[R:%.*]] = fmul double [[R1]], [[Z1_V_R210]]
-; CHECK-NEXT:    ret double [[R]]
-;
-  %X1 = fsub double %A1, %B1
-  %X2 = fsub double %A2, %B2
-  %X3 = fsub double %A3, %B3
-  %Y1 = fmul double %X1, %A1
-  %Y2 = fmul double %X2, %A2
-  %Y3 = fmul double %X3, %A3
-  %Z1 = fadd double %Y1, %B1
-  %Z2 = fadd double %Y2, %B2
-  %Z3 = fadd double %Y3, %B3
-  %R1 = fmul double %Z1, %Z2
-  %R  = fmul double %R1, %Z3
-  ret double %R
-}
-
--- a/llvm/test/Transforms/BBVectorize/vector-sel.ll
+++ b/llvm/test/Transforms/BBVectorize/vector-sel.ll
@ -1,43 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -bb-vectorize -S | FileCheck %s
-
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
-
-@d = external global [1 x [10 x [1 x i16]]], align 16
-
-define void @test() {
-; CHECK-LABEL: @test(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[BOOL:%.*]] = icmp ne i32 undef, 0
-; CHECK-NEXT:    [[BOOLVEC:%.*]] = icmp ne <4 x i32> undef, zeroinitializer
-; CHECK-NEXT:    br label [[BODY:%.*]]
-; CHECK:       body:
-; CHECK-NEXT:    [[TMP0:%.*]] = select i1 [[BOOL]], <4 x i16> <i16 -2, i16 -2, i16 -2, i16 -2>, <4 x i16> <i16 -3, i16 -3, i16 -3, i16 -3>
-; CHECK-NEXT:    [[TMP1:%.*]] = select i1 [[BOOL]], <4 x i16> <i16 -2, i16 -2, i16 -2, i16 -2>, <4 x i16> <i16 -3, i16 -3, i16 -3, i16 -3>
-; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> [[TMP1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x i1> [[BOOLVEC]], <4 x i1> [[BOOLVEC]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT:    [[TMP4:%.*]] = select <8 x i1> [[TMP3]], <8 x i16> <i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3>, <8 x i16> [[TMP2]]
-; CHECK-NEXT:    store <8 x i16> [[TMP4]], <8 x i16>* bitcast (i16* getelementptr ([1 x [10 x [1 x i16]]], [1 x [10 x [1 x i16]]]* @d, i64 0, i64 0, i64 undef, i64 0) to <8 x i16>*), align 2
-; CHECK-NEXT:    ret void
-;
-entry:
-  %bool = icmp ne i32 undef, 0
-  %boolvec = icmp ne <4 x i32> undef, zeroinitializer
-  br label %body
-
-body:
-  %0 = select i1 %bool, <4 x i16> <i16 -2, i16 -2, i16 -2, i16 -2>, <4 x i16> <i16 -3, i16 -3, i16 -3, i16 -3>
-  %1 = select i1 %bool, <4 x i16> <i16 -2, i16 -2, i16 -2, i16 -2>, <4 x i16> <i16 -3, i16 -3, i16 -3, i16 -3>
-  %2 = select <4 x i1> %boolvec, <4 x i16> <i16 -3, i16 -3, i16 -3, i16 -3>, <4 x i16> %0
-  %3 = select <4 x i1> %boolvec, <4 x i16> <i16 -3, i16 -3, i16 -3, i16 -3>, <4 x i16> %1
-  %4 = add nsw <4 x i16> %2, zeroinitializer
-  %5 = add nsw <4 x i16> %3, zeroinitializer
-  %6 = getelementptr inbounds [1 x [10 x [1 x i16]]], [1 x [10 x [1 x i16]]]* @d, i64 0, i64 0, i64 undef, i64 0
-  %7 = bitcast i16* %6 to <4 x i16>*
-  store <4 x i16> %4, <4 x i16>* %7, align 2
-  %8 = getelementptr [1 x [10 x [1 x i16]]], [1 x [10 x [1 x i16]]]* @d, i64 0, i64 0, i64 undef, i64 4
-  %9 = bitcast i16* %8 to <4 x i16>*
-  store <4 x i16> %5, <4 x i16>* %9, align 2
-  ret void
-}
--- a/llvm/test/Transforms/BBVectorize/xcore/no-vector-registers.ll
+++ b/llvm/test/Transforms/BBVectorize/xcore/no-vector-registers.ll
@ -1,18 +0,0 @@
-; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S -mtriple=xcore | FileCheck %s
-
-target datalayout = "e-p:32:32:32-a0:0:32-n32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f16:16:32-f32:32:32-f64:32:32"
-target triple = "xcore"
-
-; Basic depth-3 chain
-define double @test1(double %A1, double %A2, double %B1, double %B2) {
-; CHECK-LABEL: @test1(
-; CHECK-NOT: <2 x double>
-  %X1 = fsub double %A1, %B1
-  %X2 = fsub double %A2, %B2
-  %Y1 = fmul double %X1, %A1
-  %Y2 = fmul double %X2, %A2
-  %Z1 = fadd double %Y1, %B1
-  %Z2 = fadd double %Y2, %B2
-  %R  = fmul double %Z1, %Z2
-  ret double %R
-}