Remove the BBVectorize pass.

It served us well, helped kick-start much of the vectorization efforts
in LLVM, etc. Its time has come and past. Back in 2014:
http://lists.llvm.org/pipermail/llvm-dev/2014-November/079091.html

Time to actually let go and move forward. =]

I've updated the release notes both about the removal and the
deprecation of the corresponding C API.

llvm-svn: 306797
This commit is contained in:
Chandler Carruth 2017-06-30 07:09:08 +00:00
parent e24f434eb2
commit 3545a9e1f9
43 changed files with 18 additions and 6180 deletions

View File

@ -70,7 +70,7 @@ D: Branch weights and BlockFrequencyInfo
N: Hal Finkel
E: hfinkel@anl.gov
D: BBVectorize, the loop reroller, alias analysis and the PowerPC target
D: The loop reroller, alias analysis and the PowerPC target
N: Dan Gohman
E: sunfish@mozilla.com

View File

@ -54,8 +54,9 @@ Non-comprehensive list of changes in this release
its nature as a general purpose PDB manipulation / diagnostics tool that does
more than just dumping contents.
* ... next change ...
* The ``BBVectorize`` pass has been removed. It was fully replaced and no
longer used back in 2014 but we didn't get around to removing it. Now it is
gone. The SLP vectorizer is the suggested non-loop vectorization pass.
.. NOTE
If you would like to document a larger change, then you can add a
@ -111,7 +112,11 @@ Changes to the OCaml bindings
Changes to the C API
--------------------
During this release ...
* Deprecated the ``LLVMAddBBVectorizePass`` interface since the ``BBVectorize``
pass has been removed. It is now a no-op and will be removed in the next
release. Use ``LLVMAddSLPVectorizePass`` instead to get the supported SLP
vectorizer.
External Open Source Projects Using LLVM 5
==========================================

View File

@ -33,7 +33,7 @@ extern "C" {
* @{
*/
/** See llvm::createBBVectorizePass function. */
/** DEPRECATED - Use LLVMAddSLPVectorizePass */
void LLVMAddBBVectorizePass(LLVMPassManagerRef PM);
/** See llvm::createLoopVectorizePass function. */

View File

@ -70,7 +70,6 @@ void initializeAlwaysInlinerLegacyPassPass(PassRegistry&);
void initializeArgPromotionPass(PassRegistry&);
void initializeAssumptionCacheTrackerPass(PassRegistry&);
void initializeAtomicExpandPass(PassRegistry&);
void initializeBBVectorizePass(PassRegistry&);
void initializeBDCELegacyPassPass(PassRegistry&);
void initializeBarrierNoopPass(PassRegistry&);
void initializeBasicAAWrapperPassPass(PassRegistry&);

View File

@ -195,7 +195,6 @@ namespace {
(void) llvm::createLoopVectorizePass();
(void) llvm::createSLPVectorizerPass();
(void) llvm::createLoadStoreVectorizerPass();
(void) llvm::createBBVectorizePass();
(void) llvm::createPartiallyInlineLibCallsPass();
(void) llvm::createScalarizerPass();
(void) llvm::createSeparateConstOffsetFromGEPPass();

View File

@ -145,7 +145,6 @@ public:
bool DisableTailCalls;
bool DisableUnitAtATime;
bool DisableUnrollLoops;
bool BBVectorize;
bool SLPVectorize;
bool LoopVectorize;
bool RerollLoops;

View File

@ -106,13 +106,6 @@ struct VectorizeConfig {
VectorizeConfig();
};
//===----------------------------------------------------------------------===//
//
// BBVectorize - A basic-block vectorization pass.
//
BasicBlockPass *
createBBVectorizePass(const VectorizeConfig &C = VectorizeConfig());
//===----------------------------------------------------------------------===//
//
// LoopVectorize - Create a loop vectorization pass.

View File

@ -55,10 +55,6 @@ static cl::opt<bool>
RunSLPVectorization("vectorize-slp", cl::Hidden,
cl::desc("Run the SLP vectorization passes"));
static cl::opt<bool>
RunBBVectorization("vectorize-slp-aggressive", cl::Hidden,
cl::desc("Run the BB vectorization passes"));
static cl::opt<bool>
UseGVNAfterVectorization("use-gvn-after-vectorization",
cl::init(false), cl::Hidden,
@ -166,7 +162,6 @@ PassManagerBuilder::PassManagerBuilder() {
Inliner = nullptr;
DisableUnitAtATime = false;
DisableUnrollLoops = false;
BBVectorize = RunBBVectorization;
SLPVectorize = RunSLPVectorization;
LoopVectorize = RunLoopVectorization;
RerollLoops = RunLoopRerolling;
@ -384,26 +379,8 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
if (RerollLoops)
MPM.add(createLoopRerollPass());
if (!RunSLPAfterLoopVectorization) {
if (SLPVectorize)
MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains.
if (BBVectorize) {
MPM.add(createBBVectorizePass());
addInstructionCombiningPass(MPM);
addExtensionsToPM(EP_Peephole, MPM);
if (OptLevel > 1 && UseGVNAfterVectorization)
MPM.add(NewGVN
? createNewGVNPass()
: createGVNPass(DisableGVNLoadPRE)); // Remove redundancies
else
MPM.add(createEarlyCSEPass()); // Catch trivial redundancies
// BBVectorize may have significantly shortened a loop body; unroll again.
if (!DisableUnrollLoops)
MPM.add(createLoopUnrollPass(OptLevel));
}
}
if (!RunSLPAfterLoopVectorization && SLPVectorize)
MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains.
MPM.add(createAggressiveDCEPass()); // Delete dead instructions
MPM.add(createCFGSimplificationPass()); // Merge & remove BBs
@ -635,28 +612,10 @@ void PassManagerBuilder::populateModulePassManager(
addInstructionCombiningPass(MPM);
}
if (RunSLPAfterLoopVectorization) {
if (SLPVectorize) {
MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains.
if (OptLevel > 1 && ExtraVectorizerPasses) {
MPM.add(createEarlyCSEPass());
}
}
if (BBVectorize) {
MPM.add(createBBVectorizePass());
addInstructionCombiningPass(MPM);
addExtensionsToPM(EP_Peephole, MPM);
if (OptLevel > 1 && UseGVNAfterVectorization)
MPM.add(NewGVN
? createNewGVNPass()
: createGVNPass(DisableGVNLoadPRE)); // Remove redundancies
else
MPM.add(createEarlyCSEPass()); // Catch trivial redundancies
// BBVectorize may have significantly shortened a loop body; unroll again.
if (!DisableUnrollLoops)
MPM.add(createLoopUnrollPass(OptLevel));
if (RunSLPAfterLoopVectorization && SLPVectorize) {
MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains.
if (OptLevel > 1 && ExtraVectorizerPasses) {
MPM.add(createEarlyCSEPass());
}
}

File diff suppressed because it is too large Load Diff

View File

@ -1,5 +1,4 @@
add_llvm_library(LLVMVectorize
BBVectorize.cpp
LoadStoreVectorizer.cpp
LoopVectorize.cpp
SLPVectorizer.cpp

View File

@ -26,7 +26,6 @@ using namespace llvm;
/// initializeVectorizationPasses - Initialize all passes linked into the
/// Vectorization library.
void llvm::initializeVectorization(PassRegistry &Registry) {
initializeBBVectorizePass(Registry);
initializeLoopVectorizePass(Registry);
initializeSLPVectorizerPass(Registry);
initializeLoadStoreVectorizerPass(Registry);
@ -36,8 +35,8 @@ void LLVMInitializeVectorization(LLVMPassRegistryRef R) {
initializeVectorization(*unwrap(R));
}
// DEPRECATED: Remove after the LLVM 5 release.
void LLVMAddBBVectorizePass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createBBVectorizePass());
}
void LLVMAddLoopVectorizePass(LLVMPassManagerRef PM) {

View File

@ -2,7 +2,7 @@
; RUN: opt -O1 -S -debug %s 2>&1 | FileCheck %s --check-prefix=OPT-O1
; RUN: opt -O2 -S -debug %s 2>&1 | FileCheck %s --check-prefix=OPT-O1 --check-prefix=OPT-O2O3
; RUN: opt -O3 -S -debug %s 2>&1 | FileCheck %s --check-prefix=OPT-O1 --check-prefix=OPT-O2O3
; RUN: opt -bb-vectorize -dce -die -gvn-hoist -loweratomic -S -debug %s 2>&1 | FileCheck %s --check-prefix=OPT-MORE
; RUN: opt -dce -die -gvn-hoist -loweratomic -S -debug %s 2>&1 | FileCheck %s --check-prefix=OPT-MORE
; RUN: opt -indvars -licm -loop-deletion -loop-extract -loop-idiom -loop-instsimplify -loop-reduce -loop-reroll -loop-rotate -loop-unroll -loop-unswitch -S -debug %s 2>&1 | FileCheck %s --check-prefix=OPT-LOOP
; REQUIRES: asserts
@ -55,7 +55,6 @@ attributes #0 = { optnone noinline }
; OPT-O2O3-DAG: Skipping pass 'SLP Vectorizer'
; Additional IR passes that opt doesn't turn on by default.
; OPT-MORE-DAG: Skipping pass 'Basic-Block Vectorization'
; OPT-MORE-DAG: Skipping pass 'Dead Code Elimination'
; OPT-MORE-DAG: Skipping pass 'Dead Instruction Elimination'
; OPT-MORE-DAG: Skipping pass 'Lower atomic intrinsics

View File

@ -1,16 +0,0 @@
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -bb-vectorize -S | FileCheck %s
%"struct.btSoftBody" = type { float, float, float*, i8 }
define void @test1(%"struct.btSoftBody"* %n1, %"struct.btSoftBody"* %n2) uwtable align 2 {
entry:
%tobool15 = icmp ne %"struct.btSoftBody"* %n1, null
%cond16 = zext i1 %tobool15 to i32
%tobool21 = icmp ne %"struct.btSoftBody"* %n2, null
%cond22 = zext i1 %tobool21 to i32
ret void
; CHECK-LABEL: @test1(
}

View File

@ -1,61 +0,0 @@
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s
; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -basicaa -loop-unroll -unroll-partial-threshold=45 -unroll-allow-partial -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s -check-prefix=CHECK-UNRL
; The second check covers the use of alias analysis (with loop unrolling).
define void @test1(double* noalias %out, double* noalias %in1, double* noalias %in2) nounwind uwtable {
entry:
br label %for.body
; CHECK-LABEL: @test1(
; CHECK-UNRL-LABEL: @test1(
for.body: ; preds = %for.body, %entry
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
%arrayidx = getelementptr inbounds double, double* %in1, i64 %indvars.iv
%0 = load double, double* %arrayidx, align 8
%arrayidx2 = getelementptr inbounds double, double* %in2, i64 %indvars.iv
%1 = load double, double* %arrayidx2, align 8
%mul = fmul double %0, %0
%mul3 = fmul double %0, %1
%add = fadd double %mul, %mul3
%add4 = fadd double %1, %1
%add5 = fadd double %add4, %0
%mul6 = fmul double %0, %add5
%add7 = fadd double %add, %mul6
%mul8 = fmul double %1, %1
%add9 = fadd double %0, %0
%add10 = fadd double %add9, %0
%mul11 = fmul double %mul8, %add10
%add12 = fadd double %add7, %mul11
%arrayidx14 = getelementptr inbounds double, double* %out, i64 %indvars.iv
store double %add12, double* %arrayidx14, align 8
%indvars.iv.next = add i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
%exitcond = icmp eq i32 %lftr.wideiv, 10
br i1 %exitcond, label %for.end, label %for.body
; CHECK: insertelement
; CHECK-NEXT: insertelement
; CHECK-NEXT: fadd <2 x double>
; CHECK-NEXT: insertelement
; CHECK-NEXT: shufflevector
; CHECK-NEXT: fadd <2 x double>
; CHECK-NEXT: insertelement
; CHECK-NEXT: fmul <2 x double>
; CHECK-UNRL: %mul = fmul <2 x double> %2, %2
; CHECK-UNRL: %mul3 = fmul <2 x double> %2, %3
; CHECK-UNRL: %add = fadd <2 x double> %mul, %mul3
; CHECK-UNRL: %add4 = fadd <2 x double> %3, %3
; CHECK-UNRL: %add5 = fadd <2 x double> %add4, %2
; CHECK-UNRL: %mul6 = fmul <2 x double> %2, %add5
; CHECK-UNRL: %add7 = fadd <2 x double> %add, %mul6
; CHECK-UNRL: %mul8 = fmul <2 x double> %3, %3
; CHECK-UNRL: %add9 = fadd <2 x double> %2, %2
; CHECK-UNRL: %add10 = fadd <2 x double> %add9, %2
; CHECK-UNRL: %mul11 = fmul <2 x double> %mul8, %add10
; CHECK-UNRL: %add12 = fadd <2 x double> %add7, %mul11
for.end: ; preds = %for.body
ret void
}

View File

@ -1,95 +0,0 @@
; RUN: opt < %s -basicaa -bb-vectorize -disable-output
; This is a bugpoint-reduced test case. It did not always assert, but does reproduce the bug
; and running under valgrind (or some similar tool) will catch the error.
target datalayout = "e-p:64:64:64-S128-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f16:16:16-f32:32:32-f64:64:64-f128:128:128-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-apple-darwin12.2.0"
%0 = type { [10 x { float, float }], [10 x { float, float }], [10 x { float, float }], [10 x { float, float }], [10 x { float, float }] }
%1 = type { [10 x [8 x i8]] }
%2 = type { i64, i64 }
%3 = type { [10 x i64], i64, i64, i64, i64, i64 }
%4 = type { i64, i64, i64, i64, i64, i64 }
%5 = type { [10 x i64] }
%6 = type { [10 x float], [10 x float], [10 x float], [10 x float] }
%struct.__st_parameter_dt.1.3.5.7 = type { %struct.__st_parameter_common.0.2.4.6, i64, i64*, i64*, i8*, i8*, i32, i32, i8*, i8*, i32, i32, i8*, [256 x i8], i32*, i64, i8*, i32, i32, i8*, i8*, i32, i32, i8*, i8*, i32, i32, i8*, i8*, i32, [4 x i8] }
%struct.__st_parameter_common.0.2.4.6 = type { i32, i32, i8*, i32, i32, i8*, i32* }
@cctenso_ = external unnamed_addr global %0, align 32
@ctenso_ = external unnamed_addr global %1, align 32
@i_dim_ = external unnamed_addr global %2, align 16
@itenso1_ = external unnamed_addr global %3, align 32
@itenso2_ = external unnamed_addr global %4, align 32
@ltenso_ = external unnamed_addr global %5, align 32
@rtenso_ = external unnamed_addr global %6, align 32
@.cst = external unnamed_addr constant [8 x i8], align 8
@.cst1 = external unnamed_addr constant [3 x i8], align 8
@.cst2 = external unnamed_addr constant [29 x i8], align 8
@.cst3 = external unnamed_addr constant [32 x i8], align 64
define void @cart_to_dc2y_(double* noalias nocapture %xx, double* noalias nocapture %yy, double* noalias nocapture %zz, [5 x { double, double }]* noalias nocapture %c2ten) nounwind uwtable {
entry:
%0 = fmul double undef, undef
%1 = fmul double undef, undef
%2 = fadd double undef, undef
%3 = fmul double undef, 0x3FE8B8B76E3E9919
%4 = fsub double %0, %1
%5 = fsub double -0.000000e+00, undef
%6 = fmul double undef, undef
%7 = fmul double %4, %6
%8 = fmul double undef, 2.000000e+00
%9 = fmul double %8, undef
%10 = fmul double undef, %9
%11 = fmul double %10, undef
%12 = fsub double undef, %7
%13 = fmul double %3, %12
%14 = fmul double %3, undef
%15 = getelementptr inbounds [5 x { double, double }], [5 x { double, double }]* %c2ten, i64 0, i64 0, i32 0
store double %13, double* %15, align 8
%16 = getelementptr inbounds [5 x { double, double }], [5 x { double, double }]* %c2ten, i64 0, i64 0, i32 1
%17 = fmul double undef, %8
%18 = fmul double %17, undef
%19 = fmul double undef, %18
%20 = fadd double undef, undef
%21 = fmul double %3, %19
%22 = fsub double -0.000000e+00, %21
%23 = getelementptr inbounds [5 x { double, double }], [5 x { double, double }]* %c2ten, i64 0, i64 1, i32 0
store double %22, double* %23, align 8
%24 = getelementptr inbounds [5 x { double, double }], [5 x { double, double }]* %c2ten, i64 0, i64 1, i32 1
%25 = fmul double undef, 0x3FE42F601A8C6794
%26 = fmul double undef, 2.000000e+00
%27 = fsub double %26, %0
%28 = fmul double %6, undef
%29 = fsub double undef, %28
%30 = getelementptr inbounds [5 x { double, double }], [5 x { double, double }]* %c2ten, i64 0, i64 2, i32 0
store double undef, double* %30, align 8
%31 = getelementptr inbounds [5 x { double, double }], [5 x { double, double }]* %c2ten, i64 0, i64 2, i32 1
%32 = fmul double undef, %17
%33 = fmul double undef, %17
%34 = fmul double undef, %32
%35 = fmul double undef, %33
%36 = fsub double undef, %35
%37 = fmul double %3, %34
%38 = getelementptr inbounds [5 x { double, double }], [5 x { double, double }]* %c2ten, i64 0, i64 3, i32 0
store double %37, double* %38, align 8
%39 = getelementptr inbounds [5 x { double, double }], [5 x { double, double }]* %c2ten, i64 0, i64 3, i32 1
%40 = fmul double undef, %8
%41 = fmul double undef, %40
%42 = fmul double undef, %41
%43 = fsub double undef, %42
%44 = fmul double %3, %43
%45 = getelementptr inbounds [5 x { double, double }], [5 x { double, double }]* %c2ten, i64 0, i64 4, i32 0
store double %13, double* %45, align 8
%46 = getelementptr inbounds [5 x { double, double }], [5 x { double, double }]* %c2ten, i64 0, i64 4, i32 1
%47 = fsub double -0.000000e+00, %14
store double %47, double* %16, align 8
store double undef, double* %24, align 8
store double -0.000000e+00, double* %31, align 8
store double undef, double* %39, align 8
store double undef, double* %46, align 8
ret void
}
attributes #0 = { nounwind uwtable }
attributes #1 = { nounwind readnone }
attributes #2 = { nounwind }

View File

@ -1,54 +0,0 @@
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -bb-vectorize -S | FileCheck %s
define void @ptoa() nounwind uwtable {
entry:
%call = call i8* @malloc() nounwind
br i1 undef, label %return, label %if.end10
if.end10: ; preds = %entry
%incdec.ptr = getelementptr inbounds i8, i8* %call, i64 undef
%call17 = call i32 @ptou() nounwind
%incdec.ptr26.1 = getelementptr inbounds i8, i8* %incdec.ptr, i64 -2
store i8 undef, i8* %incdec.ptr26.1, align 1
%div27.1 = udiv i32 %call17, 100
%rem.2 = urem i32 %div27.1, 10
%add2230.2 = or i32 %rem.2, 48
%conv25.2 = trunc i32 %add2230.2 to i8
%incdec.ptr26.2 = getelementptr inbounds i8, i8* %incdec.ptr, i64 -3
store i8 %conv25.2, i8* %incdec.ptr26.2, align 1
%incdec.ptr26.3 = getelementptr inbounds i8, i8* %incdec.ptr, i64 -4
store i8 undef, i8* %incdec.ptr26.3, align 1
%div27.3 = udiv i32 %call17, 10000
%rem.4 = urem i32 %div27.3, 10
%add2230.4 = or i32 %rem.4, 48
%conv25.4 = trunc i32 %add2230.4 to i8
%incdec.ptr26.4 = getelementptr inbounds i8, i8* %incdec.ptr, i64 -5
store i8 %conv25.4, i8* %incdec.ptr26.4, align 1
%div27.4 = udiv i32 %call17, 100000
%rem.5 = urem i32 %div27.4, 10
%add2230.5 = or i32 %rem.5, 48
%conv25.5 = trunc i32 %add2230.5 to i8
%incdec.ptr26.5 = getelementptr inbounds i8, i8* %incdec.ptr, i64 -6
store i8 %conv25.5, i8* %incdec.ptr26.5, align 1
%incdec.ptr26.6 = getelementptr inbounds i8, i8* %incdec.ptr, i64 -7
store i8 0, i8* %incdec.ptr26.6, align 1
%incdec.ptr26.7 = getelementptr inbounds i8, i8* %incdec.ptr, i64 -8
store i8 undef, i8* %incdec.ptr26.7, align 1
%div27.7 = udiv i32 %call17, 100000000
%rem.8 = urem i32 %div27.7, 10
%add2230.8 = or i32 %rem.8, 48
%conv25.8 = trunc i32 %add2230.8 to i8
%incdec.ptr26.8 = getelementptr inbounds i8, i8* %incdec.ptr, i64 -9
store i8 %conv25.8, i8* %incdec.ptr26.8, align 1
unreachable
return: ; preds = %entry
ret void
; CHECK-LABEL: @ptoa(
}
declare noalias i8* @malloc() nounwind
declare i32 @ptou()

View File

@ -1,85 +0,0 @@
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
; RUN: opt < %s -basicaa -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -bb-vectorize -S | FileCheck %s
%struct.gsm_state.2.8.14.15.16.17.19.22.23.25.26.28.29.31.32.33.35.36.37.38.40.41.42.44.45.47.48.50.52.53.54.56.57.58.59.60.61.62.63.66.73.83.84.89.90.91.92.93.94.95.96.99.100.101.102.103.104.106.107.114.116.121.122.129.130.135.136.137.138.139.140.141.142.143.144.147.148.149.158.159.160.161.164.165.166.167.168.169.172.179.181.182.183.188.195.200.201.202.203.204.205.208.209.210.212.213.214.215.222.223.225.226.230.231.232.233.234.235.236.237.238.239.240.241.242.243.244.352 = type { [280 x i16], i16, i64, i32, [8 x i16], [2 x [8 x i16]], i16, i16, [9 x i16], i16, i8, i8 }
define void @gsm_encode(%struct.gsm_state.2.8.14.15.16.17.19.22.23.25.26.28.29.31.32.33.35.36.37.38.40.41.42.44.45.47.48.50.52.53.54.56.57.58.59.60.61.62.63.66.73.83.84.89.90.91.92.93.94.95.96.99.100.101.102.103.104.106.107.114.116.121.122.129.130.135.136.137.138.139.140.141.142.143.144.147.148.149.158.159.160.161.164.165.166.167.168.169.172.179.181.182.183.188.195.200.201.202.203.204.205.208.209.210.212.213.214.215.222.223.225.226.230.231.232.233.234.235.236.237.238.239.240.241.242.243.244.352* %s, i16* %source, i8* %c) nounwind uwtable {
entry:
%xmc = alloca [52 x i16], align 16
%arraydecay5 = getelementptr inbounds [52 x i16], [52 x i16]* %xmc, i64 0, i64 0
call void @Gsm_Coder(%struct.gsm_state.2.8.14.15.16.17.19.22.23.25.26.28.29.31.32.33.35.36.37.38.40.41.42.44.45.47.48.50.52.53.54.56.57.58.59.60.61.62.63.66.73.83.84.89.90.91.92.93.94.95.96.99.100.101.102.103.104.106.107.114.116.121.122.129.130.135.136.137.138.139.140.141.142.143.144.147.148.149.158.159.160.161.164.165.166.167.168.169.172.179.181.182.183.188.195.200.201.202.203.204.205.208.209.210.212.213.214.215.222.223.225.226.230.231.232.233.234.235.236.237.238.239.240.241.242.243.244.352* %s, i16* %source, i16* undef, i16* null, i16* undef, i16* undef, i16* undef, i16* %arraydecay5) nounwind
%incdec.ptr136 = getelementptr inbounds i8, i8* %c, i64 10
%incdec.ptr157 = getelementptr inbounds i8, i8* %c, i64 11
store i8 0, i8* %incdec.ptr136, align 1
%arrayidx162 = getelementptr inbounds [52 x i16], [52 x i16]* %xmc, i64 0, i64 11
%0 = load i16, i16* %arrayidx162, align 2
%conv1631 = trunc i16 %0 to i8
%and164 = shl i8 %conv1631, 3
%shl165 = and i8 %and164, 56
%incdec.ptr172 = getelementptr inbounds i8, i8* %c, i64 12
store i8 %shl165, i8* %incdec.ptr157, align 1
%1 = load i16, i16* inttoptr (i64 2 to i16*), align 2
%conv1742 = trunc i16 %1 to i8
%and175 = shl i8 %conv1742, 1
%incdec.ptr183 = getelementptr inbounds i8, i8* %c, i64 13
store i8 %and175, i8* %incdec.ptr172, align 1
%incdec.ptr199 = getelementptr inbounds i8, i8* %c, i64 14
store i8 0, i8* %incdec.ptr183, align 1
%arrayidx214 = getelementptr inbounds [52 x i16], [52 x i16]* %xmc, i64 0, i64 15
%incdec.ptr220 = getelementptr inbounds i8, i8* %c, i64 15
store i8 0, i8* %incdec.ptr199, align 1
%2 = load i16, i16* %arrayidx214, align 2
%conv2223 = trunc i16 %2 to i8
%and223 = shl i8 %conv2223, 6
%incdec.ptr235 = getelementptr inbounds i8, i8* %c, i64 16
store i8 %and223, i8* %incdec.ptr220, align 1
%arrayidx240 = getelementptr inbounds [52 x i16], [52 x i16]* %xmc, i64 0, i64 19
%3 = load i16, i16* %arrayidx240, align 2
%conv2414 = trunc i16 %3 to i8
%and242 = shl i8 %conv2414, 2
%shl243 = and i8 %and242, 28
%incdec.ptr251 = getelementptr inbounds i8, i8* %c, i64 17
store i8 %shl243, i8* %incdec.ptr235, align 1
%incdec.ptr272 = getelementptr inbounds i8, i8* %c, i64 18
store i8 0, i8* %incdec.ptr251, align 1
%arrayidx282 = getelementptr inbounds [52 x i16], [52 x i16]* %xmc, i64 0, i64 25
%4 = load i16, i16* %arrayidx282, align 2
%conv2835 = trunc i16 %4 to i8
%and284 = and i8 %conv2835, 7
%incdec.ptr287 = getelementptr inbounds i8, i8* %c, i64 19
store i8 %and284, i8* %incdec.ptr272, align 1
%incdec.ptr298 = getelementptr inbounds i8, i8* %c, i64 20
store i8 0, i8* %incdec.ptr287, align 1
%incdec.ptr314 = getelementptr inbounds i8, i8* %c, i64 21
store i8 0, i8* %incdec.ptr298, align 1
%arrayidx319 = getelementptr inbounds [52 x i16], [52 x i16]* %xmc, i64 0, i64 26
%5 = load i16, i16* %arrayidx319, align 4
%conv3206 = trunc i16 %5 to i8
%and321 = shl i8 %conv3206, 4
%shl322 = and i8 %and321, 112
%incdec.ptr335 = getelementptr inbounds i8, i8* %c, i64 22
store i8 %shl322, i8* %incdec.ptr314, align 1
%arrayidx340 = getelementptr inbounds [52 x i16], [52 x i16]* %xmc, i64 0, i64 29
%6 = load i16, i16* %arrayidx340, align 2
%conv3417 = trunc i16 %6 to i8
%and342 = shl i8 %conv3417, 3
%shl343 = and i8 %and342, 56
%incdec.ptr350 = getelementptr inbounds i8, i8* %c, i64 23
store i8 %shl343, i8* %incdec.ptr335, align 1
%incdec.ptr366 = getelementptr inbounds i8, i8* %c, i64 24
store i8 0, i8* %incdec.ptr350, align 1
%arrayidx381 = getelementptr inbounds [52 x i16], [52 x i16]* %xmc, i64 0, i64 36
%incdec.ptr387 = getelementptr inbounds i8, i8* %c, i64 25
store i8 0, i8* %incdec.ptr366, align 1
%7 = load i16, i16* %arrayidx381, align 8
%conv3898 = trunc i16 %7 to i8
%and390 = shl i8 %conv3898, 6
store i8 %and390, i8* %incdec.ptr387, align 1
unreachable
; CHECK-LABEL: @gsm_encode(
}
declare void @Gsm_Coder(%struct.gsm_state.2.8.14.15.16.17.19.22.23.25.26.28.29.31.32.33.35.36.37.38.40.41.42.44.45.47.48.50.52.53.54.56.57.58.59.60.61.62.63.66.73.83.84.89.90.91.92.93.94.95.96.99.100.101.102.103.104.106.107.114.116.121.122.129.130.135.136.137.138.139.140.141.142.143.144.147.148.149.158.159.160.161.164.165.166.167.168.169.172.179.181.182.183.188.195.200.201.202.203.204.205.208.209.210.212.213.214.215.222.223.225.226.230.231.232.233.234.235.236.237.238.239.240.241.242.243.244.352*, i16*, i16*, i16*, i16*, i16*, i16*, i16*)
declare void @llvm.trap() noreturn nounwind

View File

@ -1,170 +0,0 @@
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
; RUN: opt < %s -basicaa -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -bb-vectorize -S | FileCheck %s
%struct.gsm_state.2.8.39.44.45.55.56.57.58.59.62.63.64.65.74.75.76.77.80.87.92.93.94.95.96.97.110.111.112.113.114.128.130.135.136.137.138.139.140.141.142.143.144.145.148.149.150.151.152.169.170.177.178.179.184.185.186.187.188.201.208.209.219.220.221.223.224.225.230.231.232.233.235.236.237.238.245.246.248.249.272.274.279.280.281.282.283.286.293.298.299.314.315.316.317.318.319.320.321.322.323.324.325.326.327.328.329.330.331.332.333.334.335.336.337.338.339.340.341.342.343.344.345.346.347.348.349.350.351.352.353.565 = type { [280 x i16], i16, i64, i32, [8 x i16], [2 x [8 x i16]], i16, i16, [9 x i16], i16, i8, i8 }
define void @gsm_encode(%struct.gsm_state.2.8.39.44.45.55.56.57.58.59.62.63.64.65.74.75.76.77.80.87.92.93.94.95.96.97.110.111.112.113.114.128.130.135.136.137.138.139.140.141.142.143.144.145.148.149.150.151.152.169.170.177.178.179.184.185.186.187.188.201.208.209.219.220.221.223.224.225.230.231.232.233.235.236.237.238.245.246.248.249.272.274.279.280.281.282.283.286.293.298.299.314.315.316.317.318.319.320.321.322.323.324.325.326.327.328.329.330.331.332.333.334.335.336.337.338.339.340.341.342.343.344.345.346.347.348.349.350.351.352.353.565* %s, i16* %source, i8* %c) nounwind uwtable {
entry:
%LARc28 = alloca [2 x i64], align 16
%LARc28.sub = getelementptr inbounds [2 x i64], [2 x i64]* %LARc28, i64 0, i64 0
%tmpcast = bitcast [2 x i64]* %LARc28 to [8 x i16]*
%Nc = alloca [4 x i16], align 2
%Mc = alloca [4 x i16], align 2
%bc = alloca [4 x i16], align 2
%xmc = alloca [52 x i16], align 16
%arraydecay = bitcast [2 x i64]* %LARc28 to i16*
%arraydecay1 = getelementptr inbounds [4 x i16], [4 x i16]* %Nc, i64 0, i64 0
%arraydecay2 = getelementptr inbounds [4 x i16], [4 x i16]* %bc, i64 0, i64 0
%arraydecay3 = getelementptr inbounds [4 x i16], [4 x i16]* %Mc, i64 0, i64 0
%arraydecay5 = getelementptr inbounds [52 x i16], [52 x i16]* %xmc, i64 0, i64 0
call void @Gsm_Coder(%struct.gsm_state.2.8.39.44.45.55.56.57.58.59.62.63.64.65.74.75.76.77.80.87.92.93.94.95.96.97.110.111.112.113.114.128.130.135.136.137.138.139.140.141.142.143.144.145.148.149.150.151.152.169.170.177.178.179.184.185.186.187.188.201.208.209.219.220.221.223.224.225.230.231.232.233.235.236.237.238.245.246.248.249.272.274.279.280.281.282.283.286.293.298.299.314.315.316.317.318.319.320.321.322.323.324.325.326.327.328.329.330.331.332.333.334.335.336.337.338.339.340.341.342.343.344.345.346.347.348.349.350.351.352.353.565* %s, i16* %source, i16* %arraydecay, i16* %arraydecay1, i16* %arraydecay2, i16* %arraydecay3, i16* undef, i16* %arraydecay5) nounwind
%0 = load i64, i64* %LARc28.sub, align 16
%1 = trunc i64 %0 to i32
%conv1 = lshr i32 %1, 2
%and = and i32 %conv1, 15
%or = or i32 %and, 208
%conv6 = trunc i32 %or to i8
%incdec.ptr = getelementptr inbounds i8, i8* %c, i64 1
store i8 %conv6, i8* %c, align 1
%conv84 = trunc i64 %0 to i8
%and9 = shl i8 %conv84, 6
%incdec.ptr15 = getelementptr inbounds i8, i8* %c, i64 2
store i8 %and9, i8* %incdec.ptr, align 1
%2 = lshr i64 %0, 50
%shr226.tr = trunc i64 %2 to i8
%conv25 = and i8 %shr226.tr, 7
%incdec.ptr26 = getelementptr inbounds i8, i8* %c, i64 3
store i8 %conv25, i8* %incdec.ptr15, align 1
%incdec.ptr42 = getelementptr inbounds i8, i8* %c, i64 4
store i8 0, i8* %incdec.ptr26, align 1
%arrayidx52 = getelementptr inbounds [8 x i16], [8 x i16]* %tmpcast, i64 0, i64 7
%3 = load i16, i16* %arrayidx52, align 2
%conv537 = trunc i16 %3 to i8
%and54 = and i8 %conv537, 7
%incdec.ptr57 = getelementptr inbounds i8, i8* %c, i64 5
store i8 %and54, i8* %incdec.ptr42, align 1
%incdec.ptr68 = getelementptr inbounds i8, i8* %c, i64 6
store i8 0, i8* %incdec.ptr57, align 1
%4 = load i16, i16* %arraydecay3, align 2
%conv748 = trunc i16 %4 to i8
%and75 = shl i8 %conv748, 5
%shl76 = and i8 %and75, 96
%incdec.ptr84 = getelementptr inbounds i8, i8* %c, i64 7
store i8 %shl76, i8* %incdec.ptr68, align 1
%arrayidx94 = getelementptr inbounds [52 x i16], [52 x i16]* %xmc, i64 0, i64 1
%5 = load i16, i16* %arrayidx94, align 2
%conv959 = trunc i16 %5 to i8
%and96 = shl i8 %conv959, 1
%shl97 = and i8 %and96, 14
%or103 = or i8 %shl97, 1
%incdec.ptr105 = getelementptr inbounds i8, i8* %c, i64 8
store i8 %or103, i8* %incdec.ptr84, align 1
%arrayidx115 = getelementptr inbounds [52 x i16], [52 x i16]* %xmc, i64 0, i64 4
%6 = bitcast i16* %arrayidx115 to i32*
%7 = load i32, i32* %6, align 8
%conv11610 = trunc i32 %7 to i8
%and117 = and i8 %conv11610, 7
%incdec.ptr120 = getelementptr inbounds i8, i8* %c, i64 9
store i8 %and117, i8* %incdec.ptr105, align 1
%8 = lshr i32 %7, 16
%and12330 = shl nuw nsw i32 %8, 5
%and123 = trunc i32 %and12330 to i8
%incdec.ptr136 = getelementptr inbounds i8, i8* %c, i64 10
store i8 %and123, i8* %incdec.ptr120, align 1
%incdec.ptr157 = getelementptr inbounds i8, i8* %c, i64 11
store i8 0, i8* %incdec.ptr136, align 1
%incdec.ptr172 = getelementptr inbounds i8, i8* %c, i64 12
store i8 0, i8* %incdec.ptr157, align 1
%arrayidx173 = getelementptr inbounds [4 x i16], [4 x i16]* %Nc, i64 0, i64 1
%9 = load i16, i16* %arrayidx173, align 2
%conv17412 = zext i16 %9 to i32
%and175 = shl nuw nsw i32 %conv17412, 1
%arrayidx177 = getelementptr inbounds [4 x i16], [4 x i16]* %bc, i64 0, i64 1
%10 = load i16, i16* %arrayidx177, align 2
%conv17826 = zext i16 %10 to i32
%shr17913 = lshr i32 %conv17826, 1
%and180 = and i32 %shr17913, 1
%or181 = or i32 %and175, %and180
%conv182 = trunc i32 %or181 to i8
%incdec.ptr183 = getelementptr inbounds i8, i8* %c, i64 13
store i8 %conv182, i8* %incdec.ptr172, align 1
%arrayidx188 = getelementptr inbounds [4 x i16], [4 x i16]* %Mc, i64 0, i64 1
%11 = load i16, i16* %arrayidx188, align 2
%conv18914 = trunc i16 %11 to i8
%and190 = shl i8 %conv18914, 5
%shl191 = and i8 %and190, 96
%incdec.ptr199 = getelementptr inbounds i8, i8* %c, i64 14
store i8 %shl191, i8* %incdec.ptr183, align 1
%arrayidx209 = getelementptr inbounds [52 x i16], [52 x i16]* %xmc, i64 0, i64 14
%12 = load i16, i16* %arrayidx209, align 4
%conv21015 = trunc i16 %12 to i8
%and211 = shl i8 %conv21015, 1
%shl212 = and i8 %and211, 14
%or218 = or i8 %shl212, 1
%incdec.ptr220 = getelementptr inbounds i8, i8* %c, i64 15
store i8 %or218, i8* %incdec.ptr199, align 1
%arrayidx225 = getelementptr inbounds [52 x i16], [52 x i16]* %xmc, i64 0, i64 16
%13 = bitcast i16* %arrayidx225 to i64*
%14 = load i64, i64* %13, align 16
%conv22616 = trunc i64 %14 to i8
%and227 = shl i8 %conv22616, 3
%shl228 = and i8 %and227, 56
%incdec.ptr235 = getelementptr inbounds i8, i8* %c, i64 16
store i8 %shl228, i8* %incdec.ptr220, align 1
%15 = lshr i64 %14, 32
%and23832 = shl nuw nsw i64 %15, 5
%and238 = trunc i64 %and23832 to i8
%incdec.ptr251 = getelementptr inbounds i8, i8* %c, i64 17
store i8 %and238, i8* %incdec.ptr235, align 1
%arrayidx266 = getelementptr inbounds [52 x i16], [52 x i16]* %xmc, i64 0, i64 23
%incdec.ptr272 = getelementptr inbounds i8, i8* %c, i64 18
store i8 0, i8* %incdec.ptr251, align 1
%16 = load i16, i16* %arrayidx266, align 2
%conv27418 = trunc i16 %16 to i8
%and275 = shl i8 %conv27418, 6
%incdec.ptr287 = getelementptr inbounds i8, i8* %c, i64 19
store i8 %and275, i8* %incdec.ptr272, align 1
%arrayidx288 = getelementptr inbounds [4 x i16], [4 x i16]* %Nc, i64 0, i64 2
%17 = load i16, i16* %arrayidx288, align 2
%conv28919 = zext i16 %17 to i32
%and290 = shl nuw nsw i32 %conv28919, 1
%arrayidx292 = getelementptr inbounds [4 x i16], [4 x i16]* %bc, i64 0, i64 2
%18 = load i16, i16* %arrayidx292, align 2
%conv29327 = zext i16 %18 to i32
%shr29420 = lshr i32 %conv29327, 1
%and295 = and i32 %shr29420, 1
%or296 = or i32 %and290, %and295
%conv297 = trunc i32 %or296 to i8
%incdec.ptr298 = getelementptr inbounds i8, i8* %c, i64 20
store i8 %conv297, i8* %incdec.ptr287, align 1
%conv30021 = trunc i16 %18 to i8
%and301 = shl i8 %conv30021, 7
%incdec.ptr314 = getelementptr inbounds i8, i8* %c, i64 21
store i8 %and301, i8* %incdec.ptr298, align 1
%incdec.ptr335 = getelementptr inbounds i8, i8* %c, i64 22
store i8 0, i8* %incdec.ptr314, align 1
%arrayidx340 = getelementptr inbounds [52 x i16], [52 x i16]* %xmc, i64 0, i64 29
%19 = load i16, i16* %arrayidx340, align 2
%conv34122 = trunc i16 %19 to i8
%and342 = shl i8 %conv34122, 3
%shl343 = and i8 %and342, 56
%incdec.ptr350 = getelementptr inbounds i8, i8* %c, i64 23
store i8 %shl343, i8* %incdec.ptr335, align 1
%arrayidx355 = getelementptr inbounds [52 x i16], [52 x i16]* %xmc, i64 0, i64 32
%20 = bitcast i16* %arrayidx355 to i32*
%21 = load i32, i32* %20, align 16
%conv35623 = shl i32 %21, 2
%shl358 = and i32 %conv35623, 28
%22 = lshr i32 %21, 17
%and363 = and i32 %22, 3
%or364 = or i32 %shl358, %and363
%conv365 = trunc i32 %or364 to i8
store i8 %conv365, i8* %incdec.ptr350, align 1
unreachable
; CHECK-LABEL: @gsm_encode(
}
declare void @Gsm_Coder(%struct.gsm_state.2.8.39.44.45.55.56.57.58.59.62.63.64.65.74.75.76.77.80.87.92.93.94.95.96.97.110.111.112.113.114.128.130.135.136.137.138.139.140.141.142.143.144.145.148.149.150.151.152.169.170.177.178.179.184.185.186.187.188.201.208.209.219.220.221.223.224.225.230.231.232.233.235.236.237.238.245.246.248.249.272.274.279.280.281.282.283.286.293.298.299.314.315.316.317.318.319.320.321.322.323.324.325.326.327.328.329.330.331.332.333.334.335.336.337.338.339.340.341.342.343.344.345.346.347.348.349.350.351.352.353.565*, i16*, i16*, i16*, i16*, i16*, i16*, i16*)
declare void @llvm.trap() noreturn nounwind

View File

@ -1,25 +0,0 @@
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -bb-vectorize -S | FileCheck %s
define <4 x float> @test7(<4 x float> %A1, <4 x float> %B1, double %C1, double %C2, double %D1, double %D2) {
%A2 = shufflevector <4 x float> %A1, <4 x float> undef, <4 x i32> <i32 2, i32 1, i32 0, i32 3>
%B2 = shufflevector <4 x float> %B1, <4 x float> undef, <4 x i32> <i32 2, i32 1, i32 0, i32 3>
%X1 = shufflevector <4 x float> %A2, <4 x float> undef, <2 x i32> <i32 0, i32 1>
%X2 = shufflevector <4 x float> %B2, <4 x float> undef, <2 x i32> <i32 2, i32 3>
%Y1 = shufflevector <2 x float> %X1, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
%Y2 = shufflevector <2 x float> %X2, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
%M1 = fsub double %C1, %D1
%M2 = fsub double %C2, %D2
%N1 = fmul double %M1, %C1
%N2 = fmul double %M2, %C2
%Z1 = fadd double %N1, %D1
%Z2 = fadd double %N2, %D2
%R = fmul <4 x float> %Y1, %Y2
ret <4 x float> %R
; CHECK-LABEL: @test7(
; CHECK-NOT: <8 x float>
; CHECK: ret <4 x float>
}

View File

@ -1,127 +0,0 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
declare double @llvm.fma.f64(double, double, double)
declare double @llvm.fmuladd.f64(double, double, double)
declare double @llvm.cos.f64(double)
declare double @llvm.powi.f64(double, i32)
; Basic depth-3 chain with fma
define double @test1(double %A1, double %A2, double %B1, double %B2, double %C1, double %C2) {
; CHECK-LABEL: @test1(
; CHECK-NEXT: [[X1:%.*]] = fsub double [[A1:%.*]], [[B1:%.*]]
; CHECK-NEXT: [[X2:%.*]] = fsub double [[A2:%.*]], [[B2:%.*]]
; CHECK-NEXT: [[Y1:%.*]] = call double @llvm.fma.f64(double [[X1]], double [[A1]], double [[C1:%.*]])
; CHECK-NEXT: [[Y2:%.*]] = call double @llvm.fma.f64(double [[X2]], double [[A2]], double [[C2:%.*]])
; CHECK-NEXT: [[Z1:%.*]] = fadd double [[Y1]], [[B1]]
; CHECK-NEXT: [[Z2:%.*]] = fadd double [[Y2]], [[B2]]
; CHECK-NEXT: [[R:%.*]] = fmul double [[Z1]], [[Z2]]
; CHECK-NEXT: ret double [[R]]
;
%X1 = fsub double %A1, %B1
%X2 = fsub double %A2, %B2
%Y1 = call double @llvm.fma.f64(double %X1, double %A1, double %C1)
%Y2 = call double @llvm.fma.f64(double %X2, double %A2, double %C2)
%Z1 = fadd double %Y1, %B1
%Z2 = fadd double %Y2, %B2
%R = fmul double %Z1, %Z2
ret double %R
}
; Basic depth-3 chain with fmuladd
define double @test1a(double %A1, double %A2, double %B1, double %B2, double %C1, double %C2) {
; CHECK-LABEL: @test1a(
; CHECK-NEXT: [[X1_V_I1_1:%.*]] = insertelement <2 x double> undef, double [[B1:%.*]], i32 0
; CHECK-NEXT: [[X1_V_I1_2:%.*]] = insertelement <2 x double> [[X1_V_I1_1]], double [[B2:%.*]], i32 1
; CHECK-NEXT: [[X1_V_I0_1:%.*]] = insertelement <2 x double> undef, double [[A1:%.*]], i32 0
; CHECK-NEXT: [[X1_V_I0_2:%.*]] = insertelement <2 x double> [[X1_V_I0_1]], double [[A2:%.*]], i32 1
; CHECK-NEXT: [[X1:%.*]] = fsub <2 x double> [[X1_V_I0_2]], [[X1_V_I1_2]]
; CHECK-NEXT: [[Y1_V_I2_1:%.*]] = insertelement <2 x double> undef, double [[C1:%.*]], i32 0
; CHECK-NEXT: [[Y1_V_I2_2:%.*]] = insertelement <2 x double> [[Y1_V_I2_1]], double [[C2:%.*]], i32 1
; CHECK-NEXT: [[Y1:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[X1]], <2 x double> [[X1_V_I0_2]], <2 x double> [[Y1_V_I2_2]])
; CHECK-NEXT: [[Z1:%.*]] = fadd <2 x double> [[Y1]], [[X1_V_I1_2]]
; CHECK-NEXT: [[Z1_V_R1:%.*]] = extractelement <2 x double> [[Z1]], i32 0
; CHECK-NEXT: [[Z1_V_R2:%.*]] = extractelement <2 x double> [[Z1]], i32 1
; CHECK-NEXT: [[R:%.*]] = fmul double [[Z1_V_R1]], [[Z1_V_R2]]
; CHECK-NEXT: ret double [[R]]
;
%X1 = fsub double %A1, %B1
%X2 = fsub double %A2, %B2
%Y1 = call double @llvm.fmuladd.f64(double %X1, double %A1, double %C1)
%Y2 = call double @llvm.fmuladd.f64(double %X2, double %A2, double %C2)
%Z1 = fadd double %Y1, %B1
%Z2 = fadd double %Y2, %B2
%R = fmul double %Z1, %Z2
ret double %R
}
; Basic depth-3 chain with cos
define double @test2(double %A1, double %A2, double %B1, double %B2) {
; CHECK-LABEL: @test2(
; CHECK-NEXT: [[X1:%.*]] = fsub double [[A1:%.*]], [[B1:%.*]]
; CHECK-NEXT: [[X2:%.*]] = fsub double [[A2:%.*]], [[B2:%.*]]
; CHECK-NEXT: [[Y1:%.*]] = call double @llvm.cos.f64(double [[X1]])
; CHECK-NEXT: [[Y2:%.*]] = call double @llvm.cos.f64(double [[X2]])
; CHECK-NEXT: [[Z1:%.*]] = fadd double [[Y1]], [[B1]]
; CHECK-NEXT: [[Z2:%.*]] = fadd double [[Y2]], [[B2]]
; CHECK-NEXT: [[R:%.*]] = fmul double [[Z1]], [[Z2]]
; CHECK-NEXT: ret double [[R]]
;
%X1 = fsub double %A1, %B1
%X2 = fsub double %A2, %B2
%Y1 = call double @llvm.cos.f64(double %X1)
%Y2 = call double @llvm.cos.f64(double %X2)
%Z1 = fadd double %Y1, %B1
%Z2 = fadd double %Y2, %B2
%R = fmul double %Z1, %Z2
ret double %R
}
; Basic depth-3 chain with powi
define double @test3(double %A1, double %A2, double %B1, double %B2, i32 %P) {
; CHECK-LABEL: @test3(
; CHECK-NEXT: [[X1:%.*]] = fsub double [[A1:%.*]], [[B1:%.*]]
; CHECK-NEXT: [[X2:%.*]] = fsub double [[A2:%.*]], [[B2:%.*]]
; CHECK-NEXT: [[Y1:%.*]] = call double @llvm.powi.f64(double [[X1]], i32 [[P:%.*]])
; CHECK-NEXT: [[Y2:%.*]] = call double @llvm.powi.f64(double [[X2]], i32 [[P]])
; CHECK-NEXT: [[Z1:%.*]] = fadd double [[Y1]], [[B1]]
; CHECK-NEXT: [[Z2:%.*]] = fadd double [[Y2]], [[B2]]
; CHECK-NEXT: [[R:%.*]] = fmul double [[Z1]], [[Z2]]
; CHECK-NEXT: ret double [[R]]
;
%X1 = fsub double %A1, %B1
%X2 = fsub double %A2, %B2
%Y1 = call double @llvm.powi.f64(double %X1, i32 %P)
%Y2 = call double @llvm.powi.f64(double %X2, i32 %P)
%Z1 = fadd double %Y1, %B1
%Z2 = fadd double %Y2, %B2
%R = fmul double %Z1, %Z2
ret double %R
}
; Basic depth-3 chain with powi (different powers: should not vectorize)
define double @test4(double %A1, double %A2, double %B1, double %B2, i32 %P) {
; CHECK-LABEL: @test4(
; CHECK-NEXT: [[X1:%.*]] = fsub double [[A1:%.*]], [[B1:%.*]]
; CHECK-NEXT: [[X2:%.*]] = fsub double [[A2:%.*]], [[B2:%.*]]
; CHECK-NEXT: [[P2:%.*]] = add i32 [[P:%.*]], 1
; CHECK-NEXT: [[Y1:%.*]] = call double @llvm.powi.f64(double [[X1]], i32 [[P]])
; CHECK-NEXT: [[Y2:%.*]] = call double @llvm.powi.f64(double [[X2]], i32 [[P2]])
; CHECK-NEXT: [[Z1:%.*]] = fadd double [[Y1]], [[B1]]
; CHECK-NEXT: [[Z2:%.*]] = fadd double [[Y2]], [[B2]]
; CHECK-NEXT: [[R:%.*]] = fmul double [[Z1]], [[Z2]]
; CHECK-NEXT: ret double [[R]]
;
%X1 = fsub double %A1, %B1
%X2 = fsub double %A2, %B2
%P2 = add i32 %P, 1
%Y1 = call double @llvm.powi.f64(double %X1, i32 %P)
%Y2 = call double @llvm.powi.f64(double %X2, i32 %P2)
%Z1 = fadd double %Y1, %B1
%Z2 = fadd double %Y2, %B2
%R = fmul double %Z1, %Z2
ret double %R
}

View File

@ -1,33 +0,0 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
; Simple 3-pair chain with loads and stores
define void @test1(double* %a, double* %b, double* %c) nounwind uwtable readonly {
; CHECK-LABEL: @test1(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[I0_V_I0:%.*]] = bitcast double* [[A:%.*]] to <2 x double>*
; CHECK-NEXT: [[I1_V_I0:%.*]] = bitcast double* [[B:%.*]] to <2 x double>*
; CHECK-NEXT: [[I0:%.*]] = load <2 x double>, <2 x double>* [[I0_V_I0]], align 8
; CHECK-NEXT: [[I1:%.*]] = load <2 x double>, <2 x double>* [[I1_V_I0]], align 8
; CHECK-NEXT: [[MUL:%.*]] = fmul <2 x double> [[I0]], [[I1]]
; CHECK-NEXT: [[TMP0:%.*]] = bitcast double* [[C:%.*]] to <2 x double>*
; CHECK-NEXT: store <2 x double> [[MUL]], <2 x double>* [[TMP0]], align 8
; CHECK-NEXT: ret void
;
entry:
%i0 = load double, double* %a, align 8
%i1 = load double, double* %b, align 8
%mul = fmul double %i0, %i1
%arrayidx3 = getelementptr inbounds double, double* %a, i64 1
%i3 = load double, double* %arrayidx3, align 8
%arrayidx4 = getelementptr inbounds double, double* %b, i64 1
%i4 = load double, double* %arrayidx4, align 8
%mul5 = fmul double %i3, %i4
store double %mul, double* %c, align 8
%arrayidx5 = getelementptr inbounds double, double* %c, i64 1
store double %mul5, double* %arrayidx5, align 8
ret void
}

View File

@ -1,149 +0,0 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
; Basic depth-3 chain
define double @test1(double %A1, double %A2, double %B1, double %B2) {
; CHECK-LABEL: @test1(
; CHECK-NEXT: [[X1_V_I1_1:%.*]] = insertelement <2 x double> undef, double [[B1:%.*]], i32 0
; CHECK-NEXT: [[X1_V_I1_2:%.*]] = insertelement <2 x double> [[X1_V_I1_1]], double [[B2:%.*]], i32 1
; CHECK-NEXT: [[X1_V_I0_1:%.*]] = insertelement <2 x double> undef, double [[A1:%.*]], i32 0
; CHECK-NEXT: [[X1_V_I0_2:%.*]] = insertelement <2 x double> [[X1_V_I0_1]], double [[A2:%.*]], i32 1
; CHECK-NEXT: [[X1:%.*]] = fsub <2 x double> [[X1_V_I0_2]], [[X1_V_I1_2]]
; CHECK-NEXT: [[Y1:%.*]] = fmul <2 x double> [[X1]], [[X1_V_I0_2]]
; CHECK-NEXT: [[Z1:%.*]] = fadd <2 x double> [[Y1]], [[X1_V_I1_2]]
; CHECK-NEXT: [[Z1_V_R1:%.*]] = extractelement <2 x double> [[Z1]], i32 0
; CHECK-NEXT: [[Z1_V_R2:%.*]] = extractelement <2 x double> [[Z1]], i32 1
; CHECK-NEXT: [[R:%.*]] = fmul double [[Z1_V_R1]], [[Z1_V_R2]]
; CHECK-NEXT: ret double [[R]]
;
%X1 = fsub double %A1, %B1
%X2 = fsub double %A2, %B2
%Y1 = fmul double %X1, %A1
%Y2 = fmul double %X2, %A2
%Z1 = fadd double %Y1, %B1
%Z2 = fadd double %Y2, %B2
%R = fmul double %Z1, %Z2
ret double %R
}
; Basic chain
define double @test1a(double %A1, double %A2, double %B1, double %B2) {
; CHECK-LABEL: @test1a(
; CHECK-NEXT: [[X1_V_I1_1:%.*]] = insertelement <2 x double> undef, double [[B1:%.*]], i32 0
; CHECK-NEXT: [[X1_V_I1_2:%.*]] = insertelement <2 x double> [[X1_V_I1_1]], double [[B2:%.*]], i32 1
; CHECK-NEXT: [[X1_V_I0_1:%.*]] = insertelement <2 x double> undef, double [[A1:%.*]], i32 0
; CHECK-NEXT: [[X1_V_I0_2:%.*]] = insertelement <2 x double> [[X1_V_I0_1]], double [[A2:%.*]], i32 1
; CHECK-NEXT: [[X1:%.*]] = fsub <2 x double> [[X1_V_I0_2]], [[X1_V_I1_2]]
; CHECK-NEXT: [[Y1:%.*]] = fmul <2 x double> [[X1]], [[X1_V_I0_2]]
; CHECK-NEXT: [[Z1:%.*]] = fadd <2 x double> [[Y1]], [[X1_V_I1_2]]
; CHECK-NEXT: [[W1:%.*]] = fadd <2 x double> [[Y1]], [[Z1]]
; CHECK-NEXT: [[V1:%.*]] = fadd <2 x double> [[W1]], [[Z1]]
; CHECK-NEXT: [[Q1:%.*]] = fadd <2 x double> [[W1]], [[V1]]
; CHECK-NEXT: [[S1:%.*]] = fadd <2 x double> [[W1]], [[Q1]]
; CHECK-NEXT: [[S1_V_R1:%.*]] = extractelement <2 x double> [[S1]], i32 0
; CHECK-NEXT: [[S1_V_R2:%.*]] = extractelement <2 x double> [[S1]], i32 1
; CHECK-NEXT: [[R:%.*]] = fmul double [[S1_V_R1]], [[S1_V_R2]]
; CHECK-NEXT: ret double [[R]]
;
%X1 = fsub double %A1, %B1
%X2 = fsub double %A2, %B2
%Y1 = fmul double %X1, %A1
%Y2 = fmul double %X2, %A2
%Z1 = fadd double %Y1, %B1
%Z2 = fadd double %Y2, %B2
%W1 = fadd double %Y1, %Z1
%W2 = fadd double %Y2, %Z2
%V1 = fadd double %W1, %Z1
%V2 = fadd double %W2, %Z2
%Q1 = fadd double %W1, %V1
%Q2 = fadd double %W2, %V2
%S1 = fadd double %W1, %Q1
%S2 = fadd double %W2, %Q2
%R = fmul double %S1, %S2
ret double %R
}
; Basic depth-3 chain (last pair permuted)
define double @test2(double %A1, double %A2, double %B1, double %B2) {
; CHECK-LABEL: @test2(
; CHECK-NEXT: [[X1_V_I1_1:%.*]] = insertelement <2 x double> undef, double [[B1:%.*]], i32 0
; CHECK-NEXT: [[X1_V_I1_2:%.*]] = insertelement <2 x double> [[X1_V_I1_1]], double [[B2:%.*]], i32 1
; CHECK-NEXT: [[X1_V_I0_1:%.*]] = insertelement <2 x double> undef, double [[A1:%.*]], i32 0
; CHECK-NEXT: [[X1_V_I0_2:%.*]] = insertelement <2 x double> [[X1_V_I0_1]], double [[A2:%.*]], i32 1
; CHECK-NEXT: [[X1:%.*]] = fsub <2 x double> [[X1_V_I0_2]], [[X1_V_I1_2]]
; CHECK-NEXT: [[Y1:%.*]] = fmul <2 x double> [[X1]], [[X1_V_I0_2]]
; CHECK-NEXT: [[Z1_V_I1_1:%.*]] = insertelement <2 x double> undef, double [[B2]], i32 0
; CHECK-NEXT: [[Z1_V_I1_2:%.*]] = insertelement <2 x double> [[Z1_V_I1_1]], double [[B1]], i32 1
; CHECK-NEXT: [[Z2:%.*]] = fadd <2 x double> [[Y1]], [[Z1_V_I1_2]]
; CHECK-NEXT: [[Z2_V_R1:%.*]] = extractelement <2 x double> [[Z2]], i32 0
; CHECK-NEXT: [[Z2_V_R2:%.*]] = extractelement <2 x double> [[Z2]], i32 1
; CHECK-NEXT: [[R:%.*]] = fmul double [[Z2_V_R2]], [[Z2_V_R1]]
; CHECK-NEXT: ret double [[R]]
;
%X1 = fsub double %A1, %B1
%X2 = fsub double %A2, %B2
%Y1 = fmul double %X1, %A1
%Y2 = fmul double %X2, %A2
%Z1 = fadd double %Y2, %B1
%Z2 = fadd double %Y1, %B2
%R = fmul double %Z1, %Z2
ret double %R
}
; Basic depth-4 chain (internal permutation)
define double @test4(double %A1, double %A2, double %B1, double %B2) {
; CHECK-LABEL: @test4(
; CHECK-NEXT: [[X1_V_I1_1:%.*]] = insertelement <2 x double> undef, double [[B1:%.*]], i32 0
; CHECK-NEXT: [[X1_V_I1_2:%.*]] = insertelement <2 x double> [[X1_V_I1_1]], double [[B2:%.*]], i32 1
; CHECK-NEXT: [[X1_V_I0_1:%.*]] = insertelement <2 x double> undef, double [[A1:%.*]], i32 0
; CHECK-NEXT: [[X1_V_I0_2:%.*]] = insertelement <2 x double> [[X1_V_I0_1]], double [[A2:%.*]], i32 1
; CHECK-NEXT: [[X1:%.*]] = fsub <2 x double> [[X1_V_I0_2]], [[X1_V_I1_2]]
; CHECK-NEXT: [[Y1:%.*]] = fmul <2 x double> [[X1]], [[X1_V_I0_2]]
; CHECK-NEXT: [[Z1_V_I1_1:%.*]] = insertelement <2 x double> undef, double [[B2]], i32 0
; CHECK-NEXT: [[Z1_V_I1_2:%.*]] = insertelement <2 x double> [[Z1_V_I1_1]], double [[B1]], i32 1
; CHECK-NEXT: [[Z2:%.*]] = fadd <2 x double> [[Y1]], [[Z1_V_I1_2]]
; CHECK-NEXT: [[Z2_V_R1:%.*]] = extractelement <2 x double> [[Z2]], i32 0
; CHECK-NEXT: [[Z2_V_R2:%.*]] = extractelement <2 x double> [[Z2]], i32 1
; CHECK-NEXT: [[R:%.*]] = fmul double [[Z2_V_R2]], [[Z2_V_R1]]
; CHECK-NEXT: ret double [[R]]
;
%X1 = fsub double %A1, %B1
%X2 = fsub double %A2, %B2
%Y1 = fmul double %X1, %A1
%Y2 = fmul double %X2, %A2
%Z1 = fadd double %Y2, %B1
%Z2 = fadd double %Y1, %B2
%W1 = fadd double %Y2, %Z1
%W2 = fadd double %Y1, %Z2
%R = fmul double %Z1, %Z2
ret double %R
}
; Basic chain with shuffles
define <8 x i8> @test6(<8 x i8> %A1, <8 x i8> %A2, <8 x i8> %B1, <8 x i8> %B2) {
; CHECK-LABEL: @test6(
; CHECK-NEXT: [[X1:%.*]] = sub <8 x i8> [[A1:%.*]], [[B1:%.*]]
; CHECK-NEXT: [[X2:%.*]] = sub <8 x i8> [[A2:%.*]], [[B2:%.*]]
; CHECK-NEXT: [[Y1:%.*]] = mul <8 x i8> [[X1]], [[A1]]
; CHECK-NEXT: [[Y2:%.*]] = mul <8 x i8> [[X2]], [[A2]]
; CHECK-NEXT: [[Z1:%.*]] = add <8 x i8> [[Y1]], [[B1]]
; CHECK-NEXT: [[Z2:%.*]] = add <8 x i8> [[Y2]], [[B2]]
; CHECK-NEXT: [[Q1:%.*]] = shufflevector <8 x i8> [[Z1]], <8 x i8> [[Z2]], <8 x i32> <i32 15, i32 8, i32 6, i32 1, i32 13, i32 10, i32 4, i32 3>
; CHECK-NEXT: [[Q2:%.*]] = shufflevector <8 x i8> [[Z2]], <8 x i8> undef, <8 x i32> <i32 6, i32 7, i32 0, i32 1, i32 2, i32 4, i32 4, i32 1>
; CHECK-NEXT: [[R:%.*]] = mul <8 x i8> [[Q1]], [[Q2]]
; CHECK-NEXT: ret <8 x i8> [[R]]
;
%X1 = sub <8 x i8> %A1, %B1
%X2 = sub <8 x i8> %A2, %B2
%Y1 = mul <8 x i8> %X1, %A1
%Y2 = mul <8 x i8> %X2, %A2
%Z1 = add <8 x i8> %Y1, %B1
%Z2 = add <8 x i8> %Y2, %B2
%Q1 = shufflevector <8 x i8> %Z1, <8 x i8> %Z2, <8 x i32> <i32 15, i32 8, i32 6, i32 1, i32 13, i32 10, i32 4, i32 3>
%Q2 = shufflevector <8 x i8> %Z2, <8 x i8> %Z2, <8 x i32> <i32 6, i32 7, i32 0, i32 1, i32 2, i32 4, i32 4, i32 1>
%R = mul <8 x i8> %Q1, %Q2
ret <8 x i8> %R
}

View File

@ -1,19 +0,0 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -bb-vectorize -S | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
define void @main() nounwind uwtable {
; CHECK-LABEL: @main(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> undef to i128
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> undef to i128
; CHECK-NEXT: ret void
;
entry:
%0 = bitcast <2 x i64> undef to i128
%1 = bitcast <2 x i64> undef to i128
ret void
}

View File

@ -1,144 +0,0 @@
; RUN: opt -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx -disable-basicaa -bb-vectorize -S < %s | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
%class.QBezier.15 = type { double, double, double, double, double, double, double, double }
; Function Attrs: nounwind
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #0
; Function Attrs: uwtable
declare fastcc void @_ZL12printQBezier7QBezier(%class.QBezier.15* byval nocapture readonly align 8) #1
; Function Attrs: nounwind
declare void @llvm.lifetime.start(i64, i8* nocapture) #0
; Function Attrs: nounwind
declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #0
define void @main_arrayctor.cont([10 x %class.QBezier.15]* %beziers, %class.QBezier.15* %agg.tmp.i, %class.QBezier.15* %agg.tmp55.i, %class.QBezier.15* %agg.tmp56.i) {
newFuncRoot:
br label %arrayctor.cont
arrayctor.cont.ret.exitStub: ; preds = %arrayctor.cont
ret void
; CHECK-LABEL: @main_arrayctor.cont
; CHECK: <2 x double>
; CHECK: @_ZL12printQBezier7QBezier
; CHECK: store double %mul8.i, double* %x3.i, align 16
; CHECK: load double, double* %x3.i, align 16
; CHECK: ret
arrayctor.cont: ; preds = %newFuncRoot
%ref.tmp.sroa.0.0.idx = getelementptr inbounds [10 x %class.QBezier.15], [10 x %class.QBezier.15]* %beziers, i64 0, i64 0, i32 0
store double 1.000000e+01, double* %ref.tmp.sroa.0.0.idx, align 16
%ref.tmp.sroa.2.0.idx1 = getelementptr inbounds [10 x %class.QBezier.15], [10 x %class.QBezier.15]* %beziers, i64 0, i64 0, i32 1
store double 2.000000e+01, double* %ref.tmp.sroa.2.0.idx1, align 8
%ref.tmp.sroa.3.0.idx2 = getelementptr inbounds [10 x %class.QBezier.15], [10 x %class.QBezier.15]* %beziers, i64 0, i64 0, i32 2
store double 3.000000e+01, double* %ref.tmp.sroa.3.0.idx2, align 16
%ref.tmp.sroa.4.0.idx3 = getelementptr inbounds [10 x %class.QBezier.15], [10 x %class.QBezier.15]* %beziers, i64 0, i64 0, i32 3
store double 4.000000e+01, double* %ref.tmp.sroa.4.0.idx3, align 8
%ref.tmp.sroa.5.0.idx4 = getelementptr inbounds [10 x %class.QBezier.15], [10 x %class.QBezier.15]* %beziers, i64 0, i64 0, i32 4
store double 5.000000e+01, double* %ref.tmp.sroa.5.0.idx4, align 16
%ref.tmp.sroa.6.0.idx5 = getelementptr inbounds [10 x %class.QBezier.15], [10 x %class.QBezier.15]* %beziers, i64 0, i64 0, i32 5
store double 6.000000e+01, double* %ref.tmp.sroa.6.0.idx5, align 8
%ref.tmp.sroa.7.0.idx6 = getelementptr inbounds [10 x %class.QBezier.15], [10 x %class.QBezier.15]* %beziers, i64 0, i64 0, i32 6
store double 7.000000e+01, double* %ref.tmp.sroa.7.0.idx6, align 16
%ref.tmp.sroa.8.0.idx7 = getelementptr inbounds [10 x %class.QBezier.15], [10 x %class.QBezier.15]* %beziers, i64 0, i64 0, i32 7
store double 8.000000e+01, double* %ref.tmp.sroa.8.0.idx7, align 8
%add.ptr = getelementptr inbounds [10 x %class.QBezier.15], [10 x %class.QBezier.15]* %beziers, i64 0, i64 1
%v0 = bitcast %class.QBezier.15* %agg.tmp.i to i8*
call void @llvm.lifetime.start(i64 64, i8* %v0)
%v1 = bitcast %class.QBezier.15* %agg.tmp55.i to i8*
call void @llvm.lifetime.start(i64 64, i8* %v1)
%v2 = bitcast %class.QBezier.15* %agg.tmp56.i to i8*
call void @llvm.lifetime.start(i64 64, i8* %v2)
%v3 = bitcast [10 x %class.QBezier.15]* %beziers to i8*
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %v0, i8* %v3, i64 64, i32 8, i1 false)
call fastcc void @_ZL12printQBezier7QBezier(%class.QBezier.15* byval align 8 %agg.tmp.i)
%x2.i = getelementptr inbounds [10 x %class.QBezier.15], [10 x %class.QBezier.15]* %beziers, i64 0, i64 0, i32 2
%v4 = load double, double* %x2.i, align 16
%x3.i = getelementptr inbounds [10 x %class.QBezier.15], [10 x %class.QBezier.15]* %beziers, i64 0, i64 0, i32 4
%v5 = load double, double* %x3.i, align 16
%add.i = fadd double %v4, %v5
%mul.i = fmul double 5.000000e-01, %add.i
%x1.i = getelementptr inbounds [10 x %class.QBezier.15], [10 x %class.QBezier.15]* %beziers, i64 0, i64 0, i32 0
%v6 = load double, double* %x1.i, align 16
%add3.i = fadd double %v4, %v6
%mul4.i = fmul double 5.000000e-01, %add3.i
%x25.i = getelementptr inbounds [10 x %class.QBezier.15], [10 x %class.QBezier.15]* %beziers, i64 0, i64 1, i32 2
store double %mul4.i, double* %x25.i, align 16
%v7 = load double, double* %x3.i, align 16
%x4.i = getelementptr inbounds [10 x %class.QBezier.15], [10 x %class.QBezier.15]* %beziers, i64 0, i64 0, i32 6
%v8 = load double, double* %x4.i, align 16
%add7.i = fadd double %v7, %v8
%mul8.i = fmul double 5.000000e-01, %add7.i
store double %mul8.i, double* %x3.i, align 16
%v9 = load double, double* %x1.i, align 16
%x111.i = getelementptr inbounds %class.QBezier.15, %class.QBezier.15* %add.ptr, i64 0, i32 0
store double %v9, double* %x111.i, align 16
%v10 = load double, double* %x25.i, align 16
%add15.i = fadd double %mul.i, %v10
%mul16.i = fmul double 5.000000e-01, %add15.i
%x317.i = getelementptr inbounds [10 x %class.QBezier.15], [10 x %class.QBezier.15]* %beziers, i64 0, i64 1, i32 4
store double %mul16.i, double* %x317.i, align 16
%v11 = load double, double* %x3.i, align 16
%add19.i = fadd double %mul.i, %v11
%mul20.i = fmul double 5.000000e-01, %add19.i
store double %mul20.i, double* %x2.i, align 16
%v12 = load double, double* %x317.i, align 16
%add24.i = fadd double %v12, %mul20.i
%mul25.i = fmul double 5.000000e-01, %add24.i
store double %mul25.i, double* %x1.i, align 16
%x427.i = getelementptr inbounds [10 x %class.QBezier.15], [10 x %class.QBezier.15]* %beziers, i64 0, i64 1, i32 6
store double %mul25.i, double* %x427.i, align 16
%y2.i = getelementptr inbounds [10 x %class.QBezier.15], [10 x %class.QBezier.15]* %beziers, i64 0, i64 0, i32 3
%v13 = load double, double* %y2.i, align 8
%y3.i = getelementptr inbounds [10 x %class.QBezier.15], [10 x %class.QBezier.15]* %beziers, i64 0, i64 0, i32 5
%v14 = load double, double* %y3.i, align 8
%add28.i = fadd double %v13, %v14
%div.i = fmul double 5.000000e-01, %add28.i
%y1.i = getelementptr inbounds [10 x %class.QBezier.15], [10 x %class.QBezier.15]* %beziers, i64 0, i64 0, i32 1
%v15 = load double, double* %y1.i, align 8
%add30.i = fadd double %v13, %v15
%mul31.i = fmul double 5.000000e-01, %add30.i
%y232.i = getelementptr inbounds [10 x %class.QBezier.15], [10 x %class.QBezier.15]* %beziers, i64 0, i64 1, i32 3
store double %mul31.i, double* %y232.i, align 8
%v16 = load double, double* %y3.i, align 8
%y4.i = getelementptr inbounds [10 x %class.QBezier.15], [10 x %class.QBezier.15]* %beziers, i64 0, i64 0, i32 7
%v17 = load double, double* %y4.i, align 8
%add34.i = fadd double %v16, %v17
%mul35.i = fmul double 5.000000e-01, %add34.i
store double %mul35.i, double* %y3.i, align 8
%v18 = load double, double* %y1.i, align 8
%y138.i = getelementptr inbounds [10 x %class.QBezier.15], [10 x %class.QBezier.15]* %beziers, i64 0, i64 1, i32 1
store double %v18, double* %y138.i, align 8
%v19 = load double, double* %y232.i, align 8
%add42.i = fadd double %div.i, %v19
%mul43.i = fmul double 5.000000e-01, %add42.i
%y344.i = getelementptr inbounds [10 x %class.QBezier.15], [10 x %class.QBezier.15]* %beziers, i64 0, i64 1, i32 5
store double %mul43.i, double* %y344.i, align 8
%v20 = load double, double* %y3.i, align 8
%add46.i = fadd double %div.i, %v20
%mul47.i = fmul double 5.000000e-01, %add46.i
store double %mul47.i, double* %y2.i, align 8
%v21 = load double, double* %y344.i, align 8
%add51.i = fadd double %v21, %mul47.i
%mul52.i = fmul double 5.000000e-01, %add51.i
store double %mul52.i, double* %y1.i, align 8
%y454.i = getelementptr inbounds [10 x %class.QBezier.15], [10 x %class.QBezier.15]* %beziers, i64 0, i64 1, i32 7
store double %mul52.i, double* %y454.i, align 8
%v22 = bitcast %class.QBezier.15* %add.ptr to i8*
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %v1, i8* %v22, i64 64, i32 8, i1 false)
call fastcc void @_ZL12printQBezier7QBezier(%class.QBezier.15* byval align 8 %agg.tmp55.i)
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %v2, i8* %v3, i64 64, i32 8, i1 false)
call fastcc void @_ZL12printQBezier7QBezier(%class.QBezier.15* byval align 8 %agg.tmp56.i)
call void @llvm.lifetime.end.p0i8(i64 64, i8* %v0)
call void @llvm.lifetime.end.p0i8(i64 64, i8* %v1)
call void @llvm.lifetime.end.p0i8(i64 64, i8* %v2)
br label %arrayctor.cont.ret.exitStub
}
attributes #0 = { nounwind }
attributes #1 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }

View File

@ -1,112 +0,0 @@
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-ignore-target-info -instcombine -gvn -S | FileCheck %s
; This test checks the non-trivial pairing-induced cycle avoidance. Without this cycle avoidance, the algorithm would otherwise
; want to select the pairs:
; %div77 = fdiv double %sub74, %mul76.v.r1 <-> %div125 = fdiv double %mul121, %mul76.v.r2 (div125 depends on mul117)
; %add84 = fadd double %sub83, 2.000000e+00 <-> %add127 = fadd double %mul126, 1.000000e+00 (add127 depends on div77)
; %mul95 = fmul double %sub45.v.r1, %sub36.v.r1 <-> %mul88 = fmul double %sub36.v.r1, %sub87 (mul88 depends on add84)
; %mul117 = fmul double %sub39.v.r1, %sub116 <-> %mul97 = fmul double %mul96, %sub39.v.r1 (mul97 depends on mul95)
; and so a dependency cycle would be created.
declare double @fabs(double) nounwind readnone
define void @test1(double %a, double %b, double %c, double %add80, double %mul1, double %mul2.v.r1, double %mul73, double %sub, double %sub65, double %F.0, i32 %n.0, double %Bnm3.0, double %Bnm2.0, double %Bnm1.0, double %Anm3.0, double %Anm2.0, double %Anm1.0) {
entry:
br label %go
go:
%conv = sitofp i32 %n.0 to double
%add35 = fadd double %conv, %a
%sub36 = fadd double %add35, -1.000000e+00
%add38 = fadd double %conv, %b
%sub39 = fadd double %add38, -1.000000e+00
%add41 = fadd double %conv, %c
%sub42 = fadd double %add41, -1.000000e+00
%sub45 = fadd double %add35, -2.000000e+00
%sub48 = fadd double %add38, -2.000000e+00
%sub51 = fadd double %add41, -2.000000e+00
%mul52 = shl nsw i32 %n.0, 1
%sub53 = add nsw i32 %mul52, -1
%conv54 = sitofp i32 %sub53 to double
%sub56 = add nsw i32 %mul52, -3
%conv57 = sitofp i32 %sub56 to double
%sub59 = add nsw i32 %mul52, -5
%conv60 = sitofp i32 %sub59 to double
%mul61 = mul nsw i32 %n.0, %n.0
%conv62 = sitofp i32 %mul61 to double
%mul63 = fmul double %conv62, 3.000000e+00
%mul67 = fmul double %sub65, %conv
%add68 = fadd double %mul63, %mul67
%add69 = fadd double %add68, 2.000000e+00
%sub71 = fsub double %add69, %mul2.v.r1
%sub74 = fsub double %sub71, %mul73
%mul75 = fmul double %conv57, 2.000000e+00
%mul76 = fmul double %mul75, %sub42
%div77 = fdiv double %sub74, %mul76
%mul82 = fmul double %add80, %conv
%sub83 = fsub double %mul63, %mul82
%add84 = fadd double %sub83, 2.000000e+00
%sub86 = fsub double %add84, %mul2.v.r1
%sub87 = fsub double -0.000000e+00, %sub86
%mul88 = fmul double %sub36, %sub87
%mul89 = fmul double %mul88, %sub39
%mul90 = fmul double %conv54, 4.000000e+00
%mul91 = fmul double %mul90, %conv57
%mul92 = fmul double %mul91, %sub51
%mul93 = fmul double %mul92, %sub42
%div94 = fdiv double %mul89, %mul93
%mul95 = fmul double %sub45, %sub36
%mul96 = fmul double %mul95, %sub48
%mul97 = fmul double %mul96, %sub39
%sub99 = fsub double %conv, %a
%sub100 = fadd double %sub99, -2.000000e+00
%mul101 = fmul double %mul97, %sub100
%sub103 = fsub double %conv, %b
%sub104 = fadd double %sub103, -2.000000e+00
%mul105 = fmul double %mul101, %sub104
%mul106 = fmul double %conv57, 8.000000e+00
%mul107 = fmul double %mul106, %conv57
%mul108 = fmul double %mul107, %conv60
%sub111 = fadd double %add41, -3.000000e+00
%mul112 = fmul double %mul108, %sub111
%mul113 = fmul double %mul112, %sub51
%mul114 = fmul double %mul113, %sub42
%div115 = fdiv double %mul105, %mul114
%sub116 = fsub double -0.000000e+00, %sub36
%mul117 = fmul double %sub39, %sub116
%sub119 = fsub double %conv, %c
%sub120 = fadd double %sub119, -1.000000e+00
%mul121 = fmul double %mul117, %sub120
%mul123 = fmul double %mul75, %sub51
%mul124 = fmul double %mul123, %sub42
%div125 = fdiv double %mul121, %mul124
%mul126 = fmul double %div77, %sub
%add127 = fadd double %mul126, 1.000000e+00
%mul128 = fmul double %add127, %Anm1.0
%mul129 = fmul double %div94, %sub
%add130 = fadd double %div125, %mul129
%mul131 = fmul double %add130, %sub
%mul132 = fmul double %mul131, %Anm2.0
%add133 = fadd double %mul128, %mul132
%mul134 = fmul double %div115, %mul1
%mul135 = fmul double %mul134, %Anm3.0
%add136 = fadd double %add133, %mul135
%mul139 = fmul double %add127, %Bnm1.0
%mul143 = fmul double %mul131, %Bnm2.0
%add144 = fadd double %mul139, %mul143
%mul146 = fmul double %mul134, %Bnm3.0
%add147 = fadd double %add144, %mul146
%div148 = fdiv double %add136, %add147
%sub149 = fsub double %F.0, %div148
%div150 = fdiv double %sub149, %F.0
%call = tail call double @fabs(double %div150) nounwind readnone
%cmp = fcmp olt double %call, 0x3CB0000000000000
%cmp152 = icmp sgt i32 %n.0, 20000
%or.cond = or i1 %cmp, %cmp152
br i1 %or.cond, label %done, label %go
done:
ret void
; CHECK-LABEL: @test1(
; CHECK: go:
; CHECK: %conv.v.i0.1 = insertelement <2 x i32> undef, i32 %n.0, i32 0
; FIXME: When tree pruning is deterministic, include the entire output.
}

View File

@ -1,244 +0,0 @@
target datalayout = "e-p:64:64:64-S128-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f16:16:16-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-f128:128:128-n8:16:32:64"
target triple = "x86_64-unknown-linux-gnu"
; RUN: opt < %s -basicaa -bb-vectorize -bb-vectorize-req-chain-depth=2 -instcombine -gvn -S | FileCheck %s
; The chain length is set to 2 so that this will do some vectorization; check that the order of the functions is unchanged.
%struct.descriptor_dimension = type { i64, i64, i64 }
%struct.__st_parameter_common = type { i32, i32, i8*, i32, i32, i8*, i32* }
%struct.__st_parameter_dt = type { %struct.__st_parameter_common, i64, i64*, i64*, i8*, i8*, i32, i32, i8*, i8*, i32, i32, i8*, [256 x i8], i32*, i64, i8*, i32, i32, i8*, i8*, i32, i32, i8*, i8*, i32, i32, i8*, i8*, i32, [4 x i8] }
%"struct.array4_real(kind=4)" = type { i8*, i64, i64, [4 x %struct.descriptor_dimension] }
%"struct.array4_integer(kind=4).73" = type { i8*, i64, i64, [4 x %struct.descriptor_dimension] }
%struct.array4_unknown = type { i8*, i64, i64, [4 x %struct.descriptor_dimension] }
@.cst4 = external unnamed_addr constant [11 x i8], align 8
@.cst823 = external unnamed_addr constant [214 x i8], align 64
@j.4580 = external global i32
@j1.4581 = external global i32
@nty1.4590 = external global [2 x i8]
@nty2.4591 = external global [2 x i8]
@xr1.4592 = external global float
@xr2.4593 = external global float
@yr1.4594 = external global float
@yr2.4595 = external global float
@__main1_MOD_iave = external unnamed_addr global i32
@__main1_MOD_igrp = external global i32
@__main1_MOD_iounit = external global i32
@__main1_MOD_ityp = external global i32
@__main1_MOD_mclmsg = external unnamed_addr global %struct.array4_unknown, align 32
@__main1_MOD_mxdate = external unnamed_addr global %"struct.array4_integer(kind=4).73", align 32
@__main1_MOD_rmxval = external unnamed_addr global %"struct.array4_real(kind=4)", align 32
declare void @_gfortran_st_write(%struct.__st_parameter_dt*)
declare void @_gfortran_st_write_done(%struct.__st_parameter_dt*)
declare void @_gfortran_transfer_character_write(%struct.__st_parameter_dt*, i8*, i32)
declare void @_gfortran_transfer_integer_write(%struct.__st_parameter_dt*, i8*, i32)
declare void @_gfortran_transfer_real_write(%struct.__st_parameter_dt*, i8*, i32)
define i1 @"prtmax__<bb 3>_<bb 34>"(%struct.__st_parameter_dt* %memtmp3, i32 %D.4627_188.reload) nounwind {
; CHECK: prtmax__
newFuncRoot:
br label %"<bb 34>"
codeRepl80.exitStub: ; preds = %"<bb 34>"
ret i1 true
"<bb 34>.<bb 25>_crit_edge.exitStub": ; preds = %"<bb 34>"
ret i1 false
"<bb 34>": ; preds = %newFuncRoot
%tmp128 = getelementptr inbounds %struct.__st_parameter_dt, %struct.__st_parameter_dt* %memtmp3, i32 0, i32 0
%tmp129 = getelementptr inbounds %struct.__st_parameter_common, %struct.__st_parameter_common* %tmp128, i32 0, i32 2
store i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.cst4, i64 0, i64 0), i8** %tmp129, align 8
%tmp130 = getelementptr inbounds %struct.__st_parameter_dt, %struct.__st_parameter_dt* %memtmp3, i32 0, i32 0
%tmp131 = getelementptr inbounds %struct.__st_parameter_common, %struct.__st_parameter_common* %tmp130, i32 0, i32 3
store i32 31495, i32* %tmp131, align 4
%tmp132 = getelementptr inbounds %struct.__st_parameter_dt, %struct.__st_parameter_dt* %memtmp3, i32 0, i32 5
store i8* getelementptr inbounds ([214 x i8], [214 x i8]* @.cst823, i64 0, i64 0), i8** %tmp132, align 8
%tmp133 = getelementptr inbounds %struct.__st_parameter_dt, %struct.__st_parameter_dt* %memtmp3, i32 0, i32 6
store i32 214, i32* %tmp133, align 4
%tmp134 = getelementptr inbounds %struct.__st_parameter_dt, %struct.__st_parameter_dt* %memtmp3, i32 0, i32 0
%tmp135 = getelementptr inbounds %struct.__st_parameter_common, %struct.__st_parameter_common* %tmp134, i32 0, i32 0
store i32 4096, i32* %tmp135, align 4
%iounit.8748_288 = load i32, i32* @__main1_MOD_iounit, align 4
%tmp136 = getelementptr inbounds %struct.__st_parameter_dt, %struct.__st_parameter_dt* %memtmp3, i32 0, i32 0
%tmp137 = getelementptr inbounds %struct.__st_parameter_common, %struct.__st_parameter_common* %tmp136, i32 0, i32 1
store i32 %iounit.8748_288, i32* %tmp137, align 4
call void @_gfortran_st_write(%struct.__st_parameter_dt* %memtmp3) nounwind
call void bitcast (void (%struct.__st_parameter_dt*, i8*, i32)* @_gfortran_transfer_integer_write to void (%struct.__st_parameter_dt*, i32*, i32)*)(%struct.__st_parameter_dt* %memtmp3, i32* @j.4580, i32 4) nounwind
; CHECK: @_gfortran_transfer_integer_write
%D.75807_289 = load i8*, i8** getelementptr inbounds (%"struct.array4_real(kind=4)", %"struct.array4_real(kind=4)"* @__main1_MOD_rmxval, i64 0, i32 0), align 8
%j.8758_290 = load i32, i32* @j.4580, align 4
%D.75760_291 = sext i32 %j.8758_290 to i64
%iave.8736_292 = load i32, i32* @__main1_MOD_iave, align 4
%D.75620_293 = sext i32 %iave.8736_292 to i64
%D.75808_294 = load i64, i64* getelementptr inbounds (%"struct.array4_real(kind=4)", %"struct.array4_real(kind=4)"* @__main1_MOD_rmxval, i64 0, i32 3, i64 2, i32 0), align 8
%D.75809_295 = mul nsw i64 %D.75620_293, %D.75808_294
%igrp.8737_296 = load i32, i32* @__main1_MOD_igrp, align 4
%D.75635_297 = sext i32 %igrp.8737_296 to i64
%D.75810_298 = load i64, i64* getelementptr inbounds (%"struct.array4_real(kind=4)", %"struct.array4_real(kind=4)"* @__main1_MOD_rmxval, i64 0, i32 3, i64 1, i32 0), align 8
%D.75811_299 = mul nsw i64 %D.75635_297, %D.75810_298
%D.75812_300 = add nsw i64 %D.75809_295, %D.75811_299
%D.75813_301 = add nsw i64 %D.75760_291, %D.75812_300
%ityp.8750_302 = load i32, i32* @__main1_MOD_ityp, align 4
%D.75704_303 = sext i32 %ityp.8750_302 to i64
%D.75814_304 = load i64, i64* getelementptr inbounds (%"struct.array4_real(kind=4)", %"struct.array4_real(kind=4)"* @__main1_MOD_rmxval, i64 0, i32 3, i64 3, i32 0), align 8
%D.75815_305 = mul nsw i64 %D.75704_303, %D.75814_304
%D.75816_306 = add nsw i64 %D.75813_301, %D.75815_305
%D.75817_307 = load i64, i64* getelementptr inbounds (%"struct.array4_real(kind=4)", %"struct.array4_real(kind=4)"* @__main1_MOD_rmxval, i64 0, i32 1), align 8
%D.75818_308 = add nsw i64 %D.75816_306, %D.75817_307
%tmp138 = bitcast i8* %D.75807_289 to [0 x float]*
%tmp139 = bitcast [0 x float]* %tmp138 to float*
%D.75819_309 = getelementptr inbounds float, float* %tmp139, i64 %D.75818_308
call void bitcast (void (%struct.__st_parameter_dt*, i8*, i32)* @_gfortran_transfer_real_write to void (%struct.__st_parameter_dt*, float*, i32)*)(%struct.__st_parameter_dt* %memtmp3, float* %D.75819_309, i32 4) nounwind
; CHECK: @_gfortran_transfer_real_write
%D.75820_310 = load i8*, i8** getelementptr inbounds (%struct.array4_unknown, %struct.array4_unknown* @__main1_MOD_mclmsg, i64 0, i32 0), align 8
%j.8758_311 = load i32, i32* @j.4580, align 4
%D.75760_312 = sext i32 %j.8758_311 to i64
%iave.8736_313 = load i32, i32* @__main1_MOD_iave, align 4
%D.75620_314 = sext i32 %iave.8736_313 to i64
%D.75821_315 = load i64, i64* getelementptr inbounds (%struct.array4_unknown, %struct.array4_unknown* @__main1_MOD_mclmsg, i64 0, i32 3, i64 2, i32 0), align 8
%D.75822_316 = mul nsw i64 %D.75620_314, %D.75821_315
%igrp.8737_317 = load i32, i32* @__main1_MOD_igrp, align 4
%D.75635_318 = sext i32 %igrp.8737_317 to i64
%D.75823_319 = load i64, i64* getelementptr inbounds (%struct.array4_unknown, %struct.array4_unknown* @__main1_MOD_mclmsg, i64 0, i32 3, i64 1, i32 0), align 8
%D.75824_320 = mul nsw i64 %D.75635_318, %D.75823_319
%D.75825_321 = add nsw i64 %D.75822_316, %D.75824_320
%D.75826_322 = add nsw i64 %D.75760_312, %D.75825_321
%ityp.8750_323 = load i32, i32* @__main1_MOD_ityp, align 4
%D.75704_324 = sext i32 %ityp.8750_323 to i64
%D.75827_325 = load i64, i64* getelementptr inbounds (%struct.array4_unknown, %struct.array4_unknown* @__main1_MOD_mclmsg, i64 0, i32 3, i64 3, i32 0), align 8
%D.75828_326 = mul nsw i64 %D.75704_324, %D.75827_325
%D.75829_327 = add nsw i64 %D.75826_322, %D.75828_326
%D.75830_328 = load i64, i64* getelementptr inbounds (%struct.array4_unknown, %struct.array4_unknown* @__main1_MOD_mclmsg, i64 0, i32 1), align 8
%D.75831_329 = add nsw i64 %D.75829_327, %D.75830_328
%tmp140 = bitcast i8* %D.75820_310 to [0 x [1 x i8]]*
%tmp141 = bitcast [0 x [1 x i8]]* %tmp140 to [1 x i8]*
%D.75832_330 = getelementptr inbounds [1 x i8], [1 x i8]* %tmp141, i64 %D.75831_329
call void bitcast (void (%struct.__st_parameter_dt*, i8*, i32)* @_gfortran_transfer_character_write to void (%struct.__st_parameter_dt*, [1 x i8]*, i32)*)(%struct.__st_parameter_dt* %memtmp3, [1 x i8]* %D.75832_330, i32 1) nounwind
; CHECK: @_gfortran_transfer_character_write
%D.75833_331 = load i8*, i8** getelementptr inbounds (%"struct.array4_integer(kind=4).73", %"struct.array4_integer(kind=4).73"* @__main1_MOD_mxdate, i64 0, i32 0), align 8
%j.8758_332 = load i32, i32* @j.4580, align 4
%D.75760_333 = sext i32 %j.8758_332 to i64
%iave.8736_334 = load i32, i32* @__main1_MOD_iave, align 4
%D.75620_335 = sext i32 %iave.8736_334 to i64
%D.75834_336 = load i64, i64* getelementptr inbounds (%"struct.array4_integer(kind=4).73", %"struct.array4_integer(kind=4).73"* @__main1_MOD_mxdate, i64 0, i32 3, i64 2, i32 0), align 8
%D.75835_337 = mul nsw i64 %D.75620_335, %D.75834_336
%igrp.8737_338 = load i32, i32* @__main1_MOD_igrp, align 4
%D.75635_339 = sext i32 %igrp.8737_338 to i64
%D.75836_340 = load i64, i64* getelementptr inbounds (%"struct.array4_integer(kind=4).73", %"struct.array4_integer(kind=4).73"* @__main1_MOD_mxdate, i64 0, i32 3, i64 1, i32 0), align 8
%D.75837_341 = mul nsw i64 %D.75635_339, %D.75836_340
%D.75838_342 = add nsw i64 %D.75835_337, %D.75837_341
%D.75839_343 = add nsw i64 %D.75760_333, %D.75838_342
%ityp.8750_344 = load i32, i32* @__main1_MOD_ityp, align 4
%D.75704_345 = sext i32 %ityp.8750_344 to i64
%D.75840_346 = load i64, i64* getelementptr inbounds (%"struct.array4_integer(kind=4).73", %"struct.array4_integer(kind=4).73"* @__main1_MOD_mxdate, i64 0, i32 3, i64 3, i32 0), align 8
%D.75841_347 = mul nsw i64 %D.75704_345, %D.75840_346
%D.75842_348 = add nsw i64 %D.75839_343, %D.75841_347
%D.75843_349 = load i64, i64* getelementptr inbounds (%"struct.array4_integer(kind=4).73", %"struct.array4_integer(kind=4).73"* @__main1_MOD_mxdate, i64 0, i32 1), align 8
%D.75844_350 = add nsw i64 %D.75842_348, %D.75843_349
%tmp142 = bitcast i8* %D.75833_331 to [0 x i32]*
%tmp143 = bitcast [0 x i32]* %tmp142 to i32*
%D.75845_351 = getelementptr inbounds i32, i32* %tmp143, i64 %D.75844_350
call void bitcast (void (%struct.__st_parameter_dt*, i8*, i32)* @_gfortran_transfer_integer_write to void (%struct.__st_parameter_dt*, i32*, i32)*)(%struct.__st_parameter_dt* %memtmp3, i32* %D.75845_351, i32 4) nounwind
; CHECK: @_gfortran_transfer_integer_write
call void bitcast (void (%struct.__st_parameter_dt*, i8*, i32)* @_gfortran_transfer_real_write to void (%struct.__st_parameter_dt*, float*, i32)*)(%struct.__st_parameter_dt* %memtmp3, float* @xr1.4592, i32 4) nounwind
; CHECK: @_gfortran_transfer_real_write
call void bitcast (void (%struct.__st_parameter_dt*, i8*, i32)* @_gfortran_transfer_real_write to void (%struct.__st_parameter_dt*, float*, i32)*)(%struct.__st_parameter_dt* %memtmp3, float* @yr1.4594, i32 4) nounwind
; CHECK: @_gfortran_transfer_real_write
call void bitcast (void (%struct.__st_parameter_dt*, i8*, i32)* @_gfortran_transfer_character_write to void (%struct.__st_parameter_dt*, [2 x i8]*, i32)*)(%struct.__st_parameter_dt* %memtmp3, [2 x i8]* @nty1.4590, i32 2) nounwind
; CHECK: @_gfortran_transfer_character_write
call void bitcast (void (%struct.__st_parameter_dt*, i8*, i32)* @_gfortran_transfer_integer_write to void (%struct.__st_parameter_dt*, i32*, i32)*)(%struct.__st_parameter_dt* %memtmp3, i32* @j1.4581, i32 4) nounwind
; CHECK: @_gfortran_transfer_integer_write
%D.75807_352 = load i8*, i8** getelementptr inbounds (%"struct.array4_real(kind=4)", %"struct.array4_real(kind=4)"* @__main1_MOD_rmxval, i64 0, i32 0), align 8
%j1.8760_353 = load i32, i32* @j1.4581, align 4
%D.75773_354 = sext i32 %j1.8760_353 to i64
%iave.8736_355 = load i32, i32* @__main1_MOD_iave, align 4
%D.75620_356 = sext i32 %iave.8736_355 to i64
%D.75808_357 = load i64, i64* getelementptr inbounds (%"struct.array4_real(kind=4)", %"struct.array4_real(kind=4)"* @__main1_MOD_rmxval, i64 0, i32 3, i64 2, i32 0), align 8
%D.75809_358 = mul nsw i64 %D.75620_356, %D.75808_357
%igrp.8737_359 = load i32, i32* @__main1_MOD_igrp, align 4
%D.75635_360 = sext i32 %igrp.8737_359 to i64
%D.75810_361 = load i64, i64* getelementptr inbounds (%"struct.array4_real(kind=4)", %"struct.array4_real(kind=4)"* @__main1_MOD_rmxval, i64 0, i32 3, i64 1, i32 0), align 8
%D.75811_362 = mul nsw i64 %D.75635_360, %D.75810_361
%D.75812_363 = add nsw i64 %D.75809_358, %D.75811_362
%D.75846_364 = add nsw i64 %D.75773_354, %D.75812_363
%ityp.8750_365 = load i32, i32* @__main1_MOD_ityp, align 4
%D.75704_366 = sext i32 %ityp.8750_365 to i64
%D.75814_367 = load i64, i64* getelementptr inbounds (%"struct.array4_real(kind=4)", %"struct.array4_real(kind=4)"* @__main1_MOD_rmxval, i64 0, i32 3, i64 3, i32 0), align 8
%D.75815_368 = mul nsw i64 %D.75704_366, %D.75814_367
%D.75847_369 = add nsw i64 %D.75846_364, %D.75815_368
%D.75817_370 = load i64, i64* getelementptr inbounds (%"struct.array4_real(kind=4)", %"struct.array4_real(kind=4)"* @__main1_MOD_rmxval, i64 0, i32 1), align 8
%D.75848_371 = add nsw i64 %D.75847_369, %D.75817_370
%tmp144 = bitcast i8* %D.75807_352 to [0 x float]*
%tmp145 = bitcast [0 x float]* %tmp144 to float*
%D.75849_372 = getelementptr inbounds float, float* %tmp145, i64 %D.75848_371
call void bitcast (void (%struct.__st_parameter_dt*, i8*, i32)* @_gfortran_transfer_real_write to void (%struct.__st_parameter_dt*, float*, i32)*)(%struct.__st_parameter_dt* %memtmp3, float* %D.75849_372, i32 4) nounwind
; CHECK: @_gfortran_transfer_real_write
%D.75820_373 = load i8*, i8** getelementptr inbounds (%struct.array4_unknown, %struct.array4_unknown* @__main1_MOD_mclmsg, i64 0, i32 0), align 8
%j1.8760_374 = load i32, i32* @j1.4581, align 4
%D.75773_375 = sext i32 %j1.8760_374 to i64
%iave.8736_376 = load i32, i32* @__main1_MOD_iave, align 4
%D.75620_377 = sext i32 %iave.8736_376 to i64
%D.75821_378 = load i64, i64* getelementptr inbounds (%struct.array4_unknown, %struct.array4_unknown* @__main1_MOD_mclmsg, i64 0, i32 3, i64 2, i32 0), align 8
%D.75822_379 = mul nsw i64 %D.75620_377, %D.75821_378
%igrp.8737_380 = load i32, i32* @__main1_MOD_igrp, align 4
%D.75635_381 = sext i32 %igrp.8737_380 to i64
%D.75823_382 = load i64, i64* getelementptr inbounds (%struct.array4_unknown, %struct.array4_unknown* @__main1_MOD_mclmsg, i64 0, i32 3, i64 1, i32 0), align 8
%D.75824_383 = mul nsw i64 %D.75635_381, %D.75823_382
%D.75825_384 = add nsw i64 %D.75822_379, %D.75824_383
%D.75850_385 = add nsw i64 %D.75773_375, %D.75825_384
%ityp.8750_386 = load i32, i32* @__main1_MOD_ityp, align 4
%D.75704_387 = sext i32 %ityp.8750_386 to i64
%D.75827_388 = load i64, i64* getelementptr inbounds (%struct.array4_unknown, %struct.array4_unknown* @__main1_MOD_mclmsg, i64 0, i32 3, i64 3, i32 0), align 8
%D.75828_389 = mul nsw i64 %D.75704_387, %D.75827_388
%D.75851_390 = add nsw i64 %D.75850_385, %D.75828_389
%D.75830_391 = load i64, i64* getelementptr inbounds (%struct.array4_unknown, %struct.array4_unknown* @__main1_MOD_mclmsg, i64 0, i32 1), align 8
%D.75852_392 = add nsw i64 %D.75851_390, %D.75830_391
%tmp146 = bitcast i8* %D.75820_373 to [0 x [1 x i8]]*
%tmp147 = bitcast [0 x [1 x i8]]* %tmp146 to [1 x i8]*
%D.75853_393 = getelementptr inbounds [1 x i8], [1 x i8]* %tmp147, i64 %D.75852_392
call void bitcast (void (%struct.__st_parameter_dt*, i8*, i32)* @_gfortran_transfer_character_write to void (%struct.__st_parameter_dt*, [1 x i8]*, i32)*)(%struct.__st_parameter_dt* %memtmp3, [1 x i8]* %D.75853_393, i32 1) nounwind
; CHECK: @_gfortran_transfer_character_write
%D.75833_394 = load i8*, i8** getelementptr inbounds (%"struct.array4_integer(kind=4).73", %"struct.array4_integer(kind=4).73"* @__main1_MOD_mxdate, i64 0, i32 0), align 8
%j1.8760_395 = load i32, i32* @j1.4581, align 4
%D.75773_396 = sext i32 %j1.8760_395 to i64
%iave.8736_397 = load i32, i32* @__main1_MOD_iave, align 4
%D.75620_398 = sext i32 %iave.8736_397 to i64
%D.75834_399 = load i64, i64* getelementptr inbounds (%"struct.array4_integer(kind=4).73", %"struct.array4_integer(kind=4).73"* @__main1_MOD_mxdate, i64 0, i32 3, i64 2, i32 0), align 8
%D.75835_400 = mul nsw i64 %D.75620_398, %D.75834_399
%igrp.8737_401 = load i32, i32* @__main1_MOD_igrp, align 4
%D.75635_402 = sext i32 %igrp.8737_401 to i64
%D.75836_403 = load i64, i64* getelementptr inbounds (%"struct.array4_integer(kind=4).73", %"struct.array4_integer(kind=4).73"* @__main1_MOD_mxdate, i64 0, i32 3, i64 1, i32 0), align 8
%D.75837_404 = mul nsw i64 %D.75635_402, %D.75836_403
%D.75838_405 = add nsw i64 %D.75835_400, %D.75837_404
%D.75854_406 = add nsw i64 %D.75773_396, %D.75838_405
%ityp.8750_407 = load i32, i32* @__main1_MOD_ityp, align 4
%D.75704_408 = sext i32 %ityp.8750_407 to i64
%D.75840_409 = load i64, i64* getelementptr inbounds (%"struct.array4_integer(kind=4).73", %"struct.array4_integer(kind=4).73"* @__main1_MOD_mxdate, i64 0, i32 3, i64 3, i32 0), align 8
%D.75841_410 = mul nsw i64 %D.75704_408, %D.75840_409
%D.75855_411 = add nsw i64 %D.75854_406, %D.75841_410
%D.75843_412 = load i64, i64* getelementptr inbounds (%"struct.array4_integer(kind=4).73", %"struct.array4_integer(kind=4).73"* @__main1_MOD_mxdate, i64 0, i32 1), align 8
%D.75856_413 = add nsw i64 %D.75855_411, %D.75843_412
%tmp148 = bitcast i8* %D.75833_394 to [0 x i32]*
%tmp149 = bitcast [0 x i32]* %tmp148 to i32*
%D.75857_414 = getelementptr inbounds i32, i32* %tmp149, i64 %D.75856_413
call void bitcast (void (%struct.__st_parameter_dt*, i8*, i32)* @_gfortran_transfer_integer_write to void (%struct.__st_parameter_dt*, i32*, i32)*)(%struct.__st_parameter_dt* %memtmp3, i32* %D.75857_414, i32 4) nounwind
; CHECK: @_gfortran_transfer_integer_write
call void bitcast (void (%struct.__st_parameter_dt*, i8*, i32)* @_gfortran_transfer_real_write to void (%struct.__st_parameter_dt*, float*, i32)*)(%struct.__st_parameter_dt* %memtmp3, float* @xr2.4593, i32 4) nounwind
; CHECK: @_gfortran_transfer_real_write
call void bitcast (void (%struct.__st_parameter_dt*, i8*, i32)* @_gfortran_transfer_real_write to void (%struct.__st_parameter_dt*, float*, i32)*)(%struct.__st_parameter_dt* %memtmp3, float* @yr2.4595, i32 4) nounwind
; CHECK: @_gfortran_transfer_real_write
call void bitcast (void (%struct.__st_parameter_dt*, i8*, i32)* @_gfortran_transfer_character_write to void (%struct.__st_parameter_dt*, [2 x i8]*, i32)*)(%struct.__st_parameter_dt* %memtmp3, [2 x i8]* @nty2.4591, i32 2) nounwind
; CHECK: @_gfortran_transfer_character_write
call void @_gfortran_st_write_done(%struct.__st_parameter_dt* %memtmp3) nounwind
; CHECK: @_gfortran_st_write_done
%j.8758_415 = load i32, i32* @j.4580, align 4
%D.4634_416 = icmp eq i32 %j.8758_415, %D.4627_188.reload
%j.8758_417 = load i32, i32* @j.4580, align 4
%j.8770_418 = add nsw i32 %j.8758_417, 1
store i32 %j.8770_418, i32* @j.4580, align 4
%tmp150 = icmp ne i1 %D.4634_416, false
br i1 %tmp150, label %codeRepl80.exitStub, label %"<bb 34>.<bb 25>_crit_edge.exitStub"
}

View File

@ -1,41 +0,0 @@
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-ignore-target-info -instcombine -gvn -S | FileCheck %s
define double @test1(double* %a, double* %b, double* %c) nounwind uwtable readonly {
entry:
%i0 = load double, double* %a, align 8
%i1 = load double, double* %b, align 8
%mul = fmul double %i0, %i1
%i2 = load double, double* %c, align 8
%add = fadd double %mul, %i2
%arrayidx3 = getelementptr inbounds double, double* %a, i64 1
%i3 = load double, double* %arrayidx3, align 8
%arrayidx4 = getelementptr inbounds double, double* %b, i64 1
%i4 = load double, double* %arrayidx4, align 8
%mul5 = fmul double %i3, %i4
%arrayidx6 = getelementptr inbounds double, double* %c, i64 1
%i5 = load double, double* %arrayidx6, align 8
%add7 = fadd double %mul5, %i5
%mul9 = fmul double %add, %i1
%add11 = fadd double %mul9, %i2
%mul13 = fmul double %add7, %i4
%add15 = fadd double %mul13, %i5
%mul16 = fmul double %add11, %add15
ret double %mul16
; CHECK-LABEL: @test1(
; CHECK: %i0.v.i0 = bitcast double* %a to <2 x double>*
; CHECK: %i1.v.i0 = bitcast double* %b to <2 x double>*
; CHECK: %i2.v.i0 = bitcast double* %c to <2 x double>*
; CHECK: %i0 = load <2 x double>, <2 x double>* %i0.v.i0, align 8
; CHECK: %i1 = load <2 x double>, <2 x double>* %i1.v.i0, align 8
; CHECK: %mul = fmul <2 x double> %i0, %i1
; CHECK: %i2 = load <2 x double>, <2 x double>* %i2.v.i0, align 8
; CHECK: %add = fadd <2 x double> %mul, %i2
; CHECK: %mul9 = fmul <2 x double> %add, %i1
; CHECK: %add11 = fadd <2 x double> %mul9, %i2
; CHECK: %add11.v.r1 = extractelement <2 x double> %add11, i32 0
; CHECK: %add11.v.r2 = extractelement <2 x double> %add11, i32 1
; CHECK: %mul16 = fmul double %add11.v.r1, %add11.v.r2
; CHECK: ret double %mul16
}

View File

@ -1,3 +0,0 @@
if not 'X86' in config.root.targets:
config.unsupported = True

View File

@ -1,93 +0,0 @@
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-ignore-target-info -instcombine -gvn -S | FileCheck %s
; RUN: opt < %s -dont-improve-non-negative-phi-bits=false -basicaa -loop-unroll -unroll-threshold=45 -unroll-partial-threshold=45 -unroll-allow-partial -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-ignore-target-info -instcombine -gvn -S | FileCheck %s -check-prefix=CHECK-UNRL
; The second check covers the use of alias analysis (with loop unrolling).
define void @test1(double* noalias %out, double* noalias %in1, double* noalias %in2) nounwind uwtable {
entry:
br label %for.body
; CHECK-LABEL: @test1(
; CHECK-UNRL-LABEL: @test1(
for.body: ; preds = %for.body, %entry
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
%arrayidx = getelementptr inbounds double, double* %in1, i64 %indvars.iv
%0 = load double, double* %arrayidx, align 8
%arrayidx2 = getelementptr inbounds double, double* %in2, i64 %indvars.iv
%1 = load double, double* %arrayidx2, align 8
%mul = fmul double %0, %0
%mul3 = fmul double %0, %1
%add = fadd double %mul, %mul3
%add4 = fadd double %1, %1
%add5 = fadd double %add4, %0
%mul6 = fmul double %0, %add5
%add7 = fadd double %add, %mul6
%mul8 = fmul double %1, %1
%add9 = fadd double %0, %0
%add10 = fadd double %add9, %0
%mul11 = fmul double %mul8, %add10
%add12 = fadd double %add7, %mul11
%arrayidx14 = getelementptr inbounds double, double* %out, i64 %indvars.iv
store double %add12, double* %arrayidx14, align 8
%indvars.iv.next = add i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
%exitcond = icmp eq i32 %lftr.wideiv, 10
br i1 %exitcond, label %for.end, label %for.body
; CHECK: %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
; CHECK: %arrayidx = getelementptr inbounds double, double* %in1, i64 %indvars.iv
; CHECK: %0 = load double, double* %arrayidx, align 8
; CHECK: %arrayidx2 = getelementptr inbounds double, double* %in2, i64 %indvars.iv
; CHECK: %1 = load double, double* %arrayidx2, align 8
; CHECK: %mul = fmul double %0, %0
; CHECK: %mul3 = fmul double %0, %1
; CHECK: %add = fadd double %mul, %mul3
; CHECK: %mul8 = fmul double %1, %1
; CHECK: %add4.v.i1.1 = insertelement <2 x double> undef, double %1, i32 0
; CHECK: %add4.v.i1.2 = insertelement <2 x double> %add4.v.i1.1, double %0, i32 1
; CHECK: %add4 = fadd <2 x double> %add4.v.i1.2, %add4.v.i1.2
; CHECK: %2 = insertelement <2 x double> undef, double %0, i32 0
; CHECK: %add5.v.i1.2 = shufflevector <2 x double> %2, <2 x double> undef, <2 x i32> zeroinitializer
; CHECK: %add5 = fadd <2 x double> %add4, %add5.v.i1.2
; CHECK: %mul6.v.i0.2 = insertelement <2 x double> %2, double %mul8, i32 1
; CHECK: %mul6 = fmul <2 x double> %mul6.v.i0.2, %add5
; CHECK: %mul6.v.r1 = extractelement <2 x double> %mul6, i32 0
; CHECK: %mul6.v.r2 = extractelement <2 x double> %mul6, i32 1
; CHECK: %add7 = fadd double %add, %mul6.v.r1
; CHECK: %add12 = fadd double %add7, %mul6.v.r2
; CHECK: %arrayidx14 = getelementptr inbounds double, double* %out, i64 %indvars.iv
; CHECK: store double %add12, double* %arrayidx14, align 8
; CHECK: %indvars.iv.next = add i64 %indvars.iv, 1
; CHECK: %lftr.wideiv = trunc i64 %indvars.iv.next to i32
; CHECK: %exitcond = icmp eq i32 %lftr.wideiv, 10
; CHECK: br i1 %exitcond, label %for.end, label %for.body
; CHECK-UNRL: %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next.1, %for.body ]
; CHECK-UNRL: %arrayidx = getelementptr inbounds double, double* %in1, i64 %indvars.iv
; CHECK-UNRL: %0 = bitcast double* %arrayidx to <2 x double>*
; CHECK-UNRL: %arrayidx2 = getelementptr inbounds double, double* %in2, i64 %indvars.iv
; CHECK-UNRL: %1 = bitcast double* %arrayidx2 to <2 x double>*
; CHECK-UNRL: %arrayidx14 = getelementptr inbounds double, double* %out, i64 %indvars.iv
; CHECK-UNRL: %2 = load <2 x double>, <2 x double>* %0, align 8
; CHECK-UNRL: %3 = load <2 x double>, <2 x double>* %1, align 8
; CHECK-UNRL: %mul = fmul <2 x double> %2, %2
; CHECK-UNRL: %mul3 = fmul <2 x double> %2, %3
; CHECK-UNRL: %add = fadd <2 x double> %mul, %mul3
; CHECK-UNRL: %add4 = fadd <2 x double> %3, %3
; CHECK-UNRL: %add5 = fadd <2 x double> %add4, %2
; CHECK-UNRL: %mul6 = fmul <2 x double> %2, %add5
; CHECK-UNRL: %add7 = fadd <2 x double> %add, %mul6
; CHECK-UNRL: %mul8 = fmul <2 x double> %3, %3
; CHECK-UNRL: %add9 = fadd <2 x double> %2, %2
; CHECK-UNRL: %add10 = fadd <2 x double> %add9, %2
; CHECK-UNRL: %mul11 = fmul <2 x double> %mul8, %add10
; CHECK-UNRL: %add12 = fadd <2 x double> %add7, %mul11
; CHECK-UNRL: %4 = bitcast double* %arrayidx14 to <2 x double>*
; CHECK-UNRL: store <2 x double> %add12, <2 x double>* %4, align 8
; CHECK-UNRL: %indvars.iv.next.1 = add nuw nsw i64 %indvars.iv, 2
; CHECK-UNRL: %lftr.wideiv.1 = trunc i64 %indvars.iv.next.1 to i32
; CHECK-UNRL: %exitcond.1 = icmp eq i32 %lftr.wideiv.1, 10
; CHECK-UNRL: br i1 %exitcond.1, label %for.end, label %for.body
for.end: ; preds = %for.body
ret void
}

View File

@ -1,22 +0,0 @@
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-unknown-linux-gnu"
; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=6 -instcombine -gvn -S | FileCheck %s
@A = common global [1024 x float] zeroinitializer, align 16
@B = common global [1024 x float] zeroinitializer, align 16
define i32 @test1() nounwind {
; CHECK-LABEL: @test1(
%V1 = load float, float* getelementptr inbounds ([1024 x float], [1024 x float]* @A, i64 0, i64 0), align 16
%V2 = load float, float* getelementptr inbounds ([1024 x float], [1024 x float]* @A, i64 0, i64 1), align 4
%V3= load float, float* getelementptr inbounds ([1024 x float], [1024 x float]* @A, i64 0, i64 2), align 8
%V4 = load float, float* getelementptr inbounds ([1024 x float], [1024 x float]* @A, i64 0, i64 3), align 4
; CHECK: %V1 = load <4 x float>, <4 x float>* bitcast ([1024 x float]* @A to <4 x float>*), align 16
store float %V1, float* getelementptr inbounds ([1024 x float], [1024 x float]* @B, i64 0, i64 0), align 16
store float %V2, float* getelementptr inbounds ([1024 x float], [1024 x float]* @B, i64 0, i64 1), align 4
store float %V3, float* getelementptr inbounds ([1024 x float], [1024 x float]* @B, i64 0, i64 2), align 8
store float %V4, float* getelementptr inbounds ([1024 x float], [1024 x float]* @B, i64 0, i64 3), align 4
; CHECK-NEXT: store <4 x float> %V1, <4 x float>* bitcast ([1024 x float]* @B to <4 x float>*), align 16
ret i32 0
; CHECK-NEXT: ret i32 0
}

View File

@ -1,49 +0,0 @@
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -S | FileCheck %s
; Simple 3-pair chain with loads and stores (with fpmath)
define void @test1(double* %a, double* %b, double* %c) nounwind uwtable readonly {
entry:
%i0 = load double, double* %a, align 8
%i1 = load double, double* %b, align 8
%mul = fmul double %i0, %i1, !fpmath !2
%arrayidx3 = getelementptr inbounds double, double* %a, i64 1
%i3 = load double, double* %arrayidx3, align 8
%arrayidx4 = getelementptr inbounds double, double* %b, i64 1
%i4 = load double, double* %arrayidx4, align 8
%mul5 = fmul double %i3, %i4, !fpmath !3
store double %mul, double* %c, align 8
%arrayidx5 = getelementptr inbounds double, double* %c, i64 1
store double %mul5, double* %arrayidx5, align 8
ret void
; CHECK-LABEL: @test1(
; CHECK: !fpmath
; CHECK: ret void
}
; Simple 3-pair chain with loads and stores (ints with range)
define void @test2(i64* %a, i64* %b, i64* %c) nounwind uwtable readonly {
entry:
%i0 = load i64, i64* %a, align 8, !range !0
%i1 = load i64, i64* %b, align 8
%mul = mul i64 %i0, %i1
%arrayidx3 = getelementptr inbounds i64, i64* %a, i64 1
%i3 = load i64, i64* %arrayidx3, align 8, !range !1
%arrayidx4 = getelementptr inbounds i64, i64* %b, i64 1
%i4 = load i64, i64* %arrayidx4, align 8
%mul5 = mul i64 %i3, %i4
store i64 %mul, i64* %c, align 8
%arrayidx5 = getelementptr inbounds i64, i64* %c, i64 1
store i64 %mul5, i64* %arrayidx5, align 8
ret void
; CHECK-LABEL: @test2(
; CHECK-NOT: !range
; CHECK: ret void
}
!0 = !{i64 0, i64 2}
!1 = !{i64 3, i64 5}
!2 = !{ float 5.0 }
!3 = !{ float 2.5 }

View File

@ -1,23 +0,0 @@
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=2 -instcombine -gvn -S | FileCheck %s
; Make sure that things (specifically getelementptr) are not connected to loads
; and stores via the address operand (which would be bad because the address
; is really a scalar even after vectorization)
define i64 @test2(i64 %a) nounwind uwtable readonly {
entry:
%a1 = inttoptr i64 %a to i64*
%a2 = getelementptr i64, i64* %a1, i64 1
%a3 = getelementptr i64, i64* %a1, i64 2
%v2 = load i64, i64* %a2, align 8
%v3 = load i64, i64* %a3, align 8
%v2a = add i64 %v2, 5
%v3a = add i64 %v3, 7
store i64 %v2a, i64* %a2, align 8
store i64 %v3a, i64* %a3, align 8
%r = add i64 %v2, %v3
ret i64 %r
; CHECK-LABEL: @test2(
; CHECK-NOT: getelementptr i64, <2 x i64*>
}

View File

@ -1,17 +0,0 @@
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth 3 -bb-vectorize-ignore-target-info -S | FileCheck %s -check-prefix=CHECK-RD3
; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth 2 -bb-vectorize-ignore-target-info -S | FileCheck %s -check-prefix=CHECK-RD2
define double @test1(double %A1, double %A2, double %B1, double %B2) {
%X1 = fsub double %A1, %B1
%X2 = fsub double %A2, %B2
%Y1 = fmul double %X1, %A1
%Y2 = fmul double %X2, %A2
%R = fmul double %Y1, %Y2
ret double %R
; CHECK-RD3-LABEL: @test1(
; CHECK-RD2-LABEL: @test1(
; CHECK-RD3-NOT: <2 x double>
; CHECK-RD2: <2 x double>
}

View File

@ -1,46 +0,0 @@
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-ignore-target-info -instcombine -gvn -S | FileCheck %s
; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-search-limit=4 -bb-vectorize-ignore-target-info -instcombine -gvn -S | FileCheck %s -check-prefix=CHECK-SL4
define double @test1(double %A1, double %A2, double %B1, double %B2) {
; CHECK-LABEL: @test1(
; CHECK-SL4-LABEL: @test1(
; CHECK-SL4-NOT: <2 x double>
; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
%X1 = fsub double %A1, %B1
%X2 = fsub double %A2, %B2
; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
%Y1 = fmul double %X1, %A1
%Y2 = fmul double %X2, %A2
; CHECK: %Y1 = fmul <2 x double> %X1, %X1.v.i0.2
%Z1 = fadd double %Y1, %B1
; Here we have a dependency chain: the short search limit will not
; see past this chain and so will not see the second part of the
; pair to vectorize.
%mul41 = fmul double %Z1, %Y2
%sub48 = fsub double %Z1, %mul41
%mul62 = fmul double %Z1, %sub48
%sub69 = fsub double %Z1, %mul62
%mul83 = fmul double %Z1, %sub69
%sub90 = fsub double %Z1, %mul83
%mul104 = fmul double %Z1, %sub90
%sub111 = fsub double %Z1, %mul104
%mul125 = fmul double %Z1, %sub111
%sub132 = fsub double %Z1, %mul125
%mul146 = fmul double %Z1, %sub132
%sub153 = fsub double %Z1, %mul146
; end of chain.
%Z2 = fadd double %Y2, %B2
; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2
%R1 = fdiv double %Z1, %Z2
%R = fmul double %R1, %sub153
; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0
; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1
; CHECK: %R1 = fdiv double %Z1.v.r1, %Z1.v.r2
ret double %R
; CHECK: ret double %R
}

View File

@ -1,514 +0,0 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-ignore-target-info -instcombine -gvn -S | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
declare double @llvm.fma.f64(double, double, double)
declare double @llvm.fmuladd.f64(double, double, double)
declare double @llvm.cos.f64(double)
declare double @llvm.powi.f64(double, i32)
declare double @llvm.round.f64(double)
declare double @llvm.copysign.f64(double, double)
declare double @llvm.ceil.f64(double)
declare double @llvm.nearbyint.f64(double)
declare double @llvm.rint.f64(double)
declare double @llvm.trunc.f64(double)
declare double @llvm.floor.f64(double)
declare double @llvm.fabs.f64(double)
declare i64 @llvm.bswap.i64(i64)
declare i64 @llvm.ctpop.i64(i64)
declare i64 @llvm.ctlz.i64(i64, i1)
declare i64 @llvm.cttz.i64(i64, i1)
; Basic depth-3 chain with fma
define double @test1(double %A1, double %A2, double %B1, double %B2, double %C1, double %C2) {
; CHECK-LABEL: @test1(
; CHECK-NEXT: [[X1_V_I1_1:%.*]] = insertelement <2 x double> undef, double [[B1:%.*]], i32 0
; CHECK-NEXT: [[X1_V_I1_2:%.*]] = insertelement <2 x double> [[X1_V_I1_1]], double [[B2:%.*]], i32 1
; CHECK-NEXT: [[X1_V_I0_1:%.*]] = insertelement <2 x double> undef, double [[A1:%.*]], i32 0
; CHECK-NEXT: [[X1_V_I0_2:%.*]] = insertelement <2 x double> [[X1_V_I0_1]], double [[A2:%.*]], i32 1
; CHECK-NEXT: [[X1:%.*]] = fsub <2 x double> [[X1_V_I0_2]], [[X1_V_I1_2]]
; CHECK-NEXT: [[Y1_V_I2_1:%.*]] = insertelement <2 x double> undef, double [[C1:%.*]], i32 0
; CHECK-NEXT: [[Y1_V_I2_2:%.*]] = insertelement <2 x double> [[Y1_V_I2_1]], double [[C2:%.*]], i32 1
; CHECK-NEXT: [[Y1:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[X1]], <2 x double> [[X1_V_I0_2]], <2 x double> [[Y1_V_I2_2]])
; CHECK-NEXT: [[Z1:%.*]] = fadd <2 x double> [[Y1]], [[X1_V_I1_2]]
; CHECK-NEXT: [[Z1_V_R1:%.*]] = extractelement <2 x double> [[Z1]], i32 0
; CHECK-NEXT: [[Z1_V_R2:%.*]] = extractelement <2 x double> [[Z1]], i32 1
; CHECK-NEXT: [[R:%.*]] = fmul double [[Z1_V_R1]], [[Z1_V_R2]]
; CHECK-NEXT: ret double [[R]]
;
%X1 = fsub double %A1, %B1
%X2 = fsub double %A2, %B2
%Y1 = call double @llvm.fma.f64(double %X1, double %A1, double %C1)
%Y2 = call double @llvm.fma.f64(double %X2, double %A2, double %C2)
%Z1 = fadd double %Y1, %B1
%Z2 = fadd double %Y2, %B2
%R = fmul double %Z1, %Z2
ret double %R
}
; Basic depth-3 chain with fmuladd
define double @test1a(double %A1, double %A2, double %B1, double %B2, double %C1, double %C2) {
; CHECK-LABEL: @test1a(
; CHECK-NEXT: [[X1_V_I1_1:%.*]] = insertelement <2 x double> undef, double [[B1:%.*]], i32 0
; CHECK-NEXT: [[X1_V_I1_2:%.*]] = insertelement <2 x double> [[X1_V_I1_1]], double [[B2:%.*]], i32 1
; CHECK-NEXT: [[X1_V_I0_1:%.*]] = insertelement <2 x double> undef, double [[A1:%.*]], i32 0
; CHECK-NEXT: [[X1_V_I0_2:%.*]] = insertelement <2 x double> [[X1_V_I0_1]], double [[A2:%.*]], i32 1
; CHECK-NEXT: [[X1:%.*]] = fsub <2 x double> [[X1_V_I0_2]], [[X1_V_I1_2]]
; CHECK-NEXT: [[Y1_V_I2_1:%.*]] = insertelement <2 x double> undef, double [[C1:%.*]], i32 0
; CHECK-NEXT: [[Y1_V_I2_2:%.*]] = insertelement <2 x double> [[Y1_V_I2_1]], double [[C2:%.*]], i32 1
; CHECK-NEXT: [[Y1:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[X1]], <2 x double> [[X1_V_I0_2]], <2 x double> [[Y1_V_I2_2]])
; CHECK-NEXT: [[Z1:%.*]] = fadd <2 x double> [[Y1]], [[X1_V_I1_2]]
; CHECK-NEXT: [[Z1_V_R1:%.*]] = extractelement <2 x double> [[Z1]], i32 0
; CHECK-NEXT: [[Z1_V_R2:%.*]] = extractelement <2 x double> [[Z1]], i32 1
; CHECK-NEXT: [[R:%.*]] = fmul double [[Z1_V_R1]], [[Z1_V_R2]]
; CHECK-NEXT: ret double [[R]]
;
%X1 = fsub double %A1, %B1
%X2 = fsub double %A2, %B2
%Y1 = call double @llvm.fmuladd.f64(double %X1, double %A1, double %C1)
%Y2 = call double @llvm.fmuladd.f64(double %X2, double %A2, double %C2)
%Z1 = fadd double %Y1, %B1
%Z2 = fadd double %Y2, %B2
%R = fmul double %Z1, %Z2
ret double %R
}
; Basic depth-3 chain with cos
define double @test2(double %A1, double %A2, double %B1, double %B2) {
; CHECK-LABEL: @test2(
; CHECK-NEXT: [[X1_V_I1_1:%.*]] = insertelement <2 x double> undef, double [[B1:%.*]], i32 0
; CHECK-NEXT: [[X1_V_I1_2:%.*]] = insertelement <2 x double> [[X1_V_I1_1]], double [[B2:%.*]], i32 1
; CHECK-NEXT: [[X1_V_I0_1:%.*]] = insertelement <2 x double> undef, double [[A1:%.*]], i32 0
; CHECK-NEXT: [[X1_V_I0_2:%.*]] = insertelement <2 x double> [[X1_V_I0_1]], double [[A2:%.*]], i32 1
; CHECK-NEXT: [[X1:%.*]] = fsub <2 x double> [[X1_V_I0_2]], [[X1_V_I1_2]]
; CHECK-NEXT: [[Y1:%.*]] = call <2 x double> @llvm.cos.v2f64(<2 x double> [[X1]])
; CHECK-NEXT: [[Z1:%.*]] = fadd <2 x double> [[Y1]], [[X1_V_I1_2]]
; CHECK-NEXT: [[Z1_V_R1:%.*]] = extractelement <2 x double> [[Z1]], i32 0
; CHECK-NEXT: [[Z1_V_R2:%.*]] = extractelement <2 x double> [[Z1]], i32 1
; CHECK-NEXT: [[R:%.*]] = fmul double [[Z1_V_R1]], [[Z1_V_R2]]
; CHECK-NEXT: ret double [[R]]
;
%X1 = fsub double %A1, %B1
%X2 = fsub double %A2, %B2
%Y1 = call double @llvm.cos.f64(double %X1)
%Y2 = call double @llvm.cos.f64(double %X2)
%Z1 = fadd double %Y1, %B1
%Z2 = fadd double %Y2, %B2
%R = fmul double %Z1, %Z2
ret double %R
}
; Basic depth-3 chain with powi
define double @test3(double %A1, double %A2, double %B1, double %B2, i32 %P) {
; CHECK-LABEL: @test3(
; CHECK-NEXT: [[X1_V_I1_1:%.*]] = insertelement <2 x double> undef, double [[B1:%.*]], i32 0
; CHECK-NEXT: [[X1_V_I1_2:%.*]] = insertelement <2 x double> [[X1_V_I1_1]], double [[B2:%.*]], i32 1
; CHECK-NEXT: [[X1_V_I0_1:%.*]] = insertelement <2 x double> undef, double [[A1:%.*]], i32 0
; CHECK-NEXT: [[X1_V_I0_2:%.*]] = insertelement <2 x double> [[X1_V_I0_1]], double [[A2:%.*]], i32 1
; CHECK-NEXT: [[X1:%.*]] = fsub <2 x double> [[X1_V_I0_2]], [[X1_V_I1_2]]
; CHECK-NEXT: [[Y1:%.*]] = call <2 x double> @llvm.powi.v2f64(<2 x double> [[X1]], i32 [[P:%.*]])
; CHECK-NEXT: [[Z1:%.*]] = fadd <2 x double> [[Y1]], [[X1_V_I1_2]]
; CHECK-NEXT: [[Z1_V_R1:%.*]] = extractelement <2 x double> [[Z1]], i32 0
; CHECK-NEXT: [[Z1_V_R2:%.*]] = extractelement <2 x double> [[Z1]], i32 1
; CHECK-NEXT: [[R:%.*]] = fmul double [[Z1_V_R1]], [[Z1_V_R2]]
; CHECK-NEXT: ret double [[R]]
;
%X1 = fsub double %A1, %B1
%X2 = fsub double %A2, %B2
%Y1 = call double @llvm.powi.f64(double %X1, i32 %P)
%Y2 = call double @llvm.powi.f64(double %X2, i32 %P)
%Z1 = fadd double %Y1, %B1
%Z2 = fadd double %Y2, %B2
%R = fmul double %Z1, %Z2
ret double %R
}
; Basic depth-3 chain with powi (different powers: should not vectorize)
define double @test4(double %A1, double %A2, double %B1, double %B2, i32 %P) {
; CHECK-LABEL: @test4(
; CHECK-NEXT: [[X1:%.*]] = fsub double [[A1:%.*]], [[B1:%.*]]
; CHECK-NEXT: [[X2:%.*]] = fsub double [[A2:%.*]], [[B2:%.*]]
; CHECK-NEXT: [[P2:%.*]] = add i32 [[P:%.*]], 1
; CHECK-NEXT: [[Y1:%.*]] = call double @llvm.powi.f64(double [[X1]], i32 [[P]])
; CHECK-NEXT: [[Y2:%.*]] = call double @llvm.powi.f64(double [[X2]], i32 [[P2]])
; CHECK-NEXT: [[Z1:%.*]] = fadd double [[Y1]], [[B1]]
; CHECK-NEXT: [[Z2:%.*]] = fadd double [[Y2]], [[B2]]
; CHECK-NEXT: [[R:%.*]] = fmul double [[Z1]], [[Z2]]
; CHECK-NEXT: ret double [[R]]
;
%X1 = fsub double %A1, %B1
%X2 = fsub double %A2, %B2
%P2 = add i32 %P, 1
%Y1 = call double @llvm.powi.f64(double %X1, i32 %P)
%Y2 = call double @llvm.powi.f64(double %X2, i32 %P2)
%Z1 = fadd double %Y1, %B1
%Z2 = fadd double %Y2, %B2
%R = fmul double %Z1, %Z2
ret double %R
}
; Basic depth-3 chain with round
define double @testround(double %A1, double %A2, double %B1, double %B2) {
; CHECK-LABEL: @testround(
; CHECK-NEXT: [[X1_V_I1_1:%.*]] = insertelement <2 x double> undef, double [[B1:%.*]], i32 0
; CHECK-NEXT: [[X1_V_I1_2:%.*]] = insertelement <2 x double> [[X1_V_I1_1]], double [[B2:%.*]], i32 1
; CHECK-NEXT: [[X1_V_I0_1:%.*]] = insertelement <2 x double> undef, double [[A1:%.*]], i32 0
; CHECK-NEXT: [[X1_V_I0_2:%.*]] = insertelement <2 x double> [[X1_V_I0_1]], double [[A2:%.*]], i32 1
; CHECK-NEXT: [[X1:%.*]] = fsub <2 x double> [[X1_V_I0_2]], [[X1_V_I1_2]]
; CHECK-NEXT: [[Y1:%.*]] = call <2 x double> @llvm.round.v2f64(<2 x double> [[X1]])
; CHECK-NEXT: [[Z1:%.*]] = fadd <2 x double> [[Y1]], [[X1_V_I1_2]]
; CHECK-NEXT: [[Z1_V_R1:%.*]] = extractelement <2 x double> [[Z1]], i32 0
; CHECK-NEXT: [[Z1_V_R2:%.*]] = extractelement <2 x double> [[Z1]], i32 1
; CHECK-NEXT: [[R:%.*]] = fmul double [[Z1_V_R1]], [[Z1_V_R2]]
; CHECK-NEXT: ret double [[R]]
;
%X1 = fsub double %A1, %B1
%X2 = fsub double %A2, %B2
%Y1 = call double @llvm.round.f64(double %X1)
%Y2 = call double @llvm.round.f64(double %X2)
%Z1 = fadd double %Y1, %B1
%Z2 = fadd double %Y2, %B2
%R = fmul double %Z1, %Z2
ret double %R
}
; Basic depth-3 chain with copysign
define double @testcopysign(double %A1, double %A2, double %B1, double %B2) {
; CHECK-LABEL: @testcopysign(
; CHECK-NEXT: [[X1_V_I1_1:%.*]] = insertelement <2 x double> undef, double [[B1:%.*]], i32 0
; CHECK-NEXT: [[X1_V_I1_2:%.*]] = insertelement <2 x double> [[X1_V_I1_1]], double [[B2:%.*]], i32 1
; CHECK-NEXT: [[X1_V_I0_1:%.*]] = insertelement <2 x double> undef, double [[A1:%.*]], i32 0
; CHECK-NEXT: [[X1_V_I0_2:%.*]] = insertelement <2 x double> [[X1_V_I0_1]], double [[A2:%.*]], i32 1
; CHECK-NEXT: [[X1:%.*]] = fsub <2 x double> [[X1_V_I0_2]], [[X1_V_I1_2]]
; CHECK-NEXT: [[Y1_V_I1_2:%.*]] = shufflevector <2 x double> [[X1_V_I0_1]], <2 x double> undef, <2 x i32> zeroinitializer
; CHECK-NEXT: [[Y1:%.*]] = call <2 x double> @llvm.copysign.v2f64(<2 x double> [[X1]], <2 x double> [[Y1_V_I1_2]])
; CHECK-NEXT: [[Z1:%.*]] = fadd <2 x double> [[Y1]], [[X1_V_I1_2]]
; CHECK-NEXT: [[Z1_V_R1:%.*]] = extractelement <2 x double> [[Z1]], i32 0
; CHECK-NEXT: [[Z1_V_R2:%.*]] = extractelement <2 x double> [[Z1]], i32 1
; CHECK-NEXT: [[R:%.*]] = fmul double [[Z1_V_R1]], [[Z1_V_R2]]
; CHECK-NEXT: ret double [[R]]
;
%X1 = fsub double %A1, %B1
%X2 = fsub double %A2, %B2
%Y1 = call double @llvm.copysign.f64(double %X1, double %A1)
%Y2 = call double @llvm.copysign.f64(double %X2, double %A1)
%Z1 = fadd double %Y1, %B1
%Z2 = fadd double %Y2, %B2
%R = fmul double %Z1, %Z2
ret double %R
}
; Basic depth-3 chain with ceil
define double @testceil(double %A1, double %A2, double %B1, double %B2) {
; CHECK-LABEL: @testceil(
; CHECK-NEXT: [[X1_V_I1_1:%.*]] = insertelement <2 x double> undef, double [[B1:%.*]], i32 0
; CHECK-NEXT: [[X1_V_I1_2:%.*]] = insertelement <2 x double> [[X1_V_I1_1]], double [[B2:%.*]], i32 1
; CHECK-NEXT: [[X1_V_I0_1:%.*]] = insertelement <2 x double> undef, double [[A1:%.*]], i32 0
; CHECK-NEXT: [[X1_V_I0_2:%.*]] = insertelement <2 x double> [[X1_V_I0_1]], double [[A2:%.*]], i32 1
; CHECK-NEXT: [[X1:%.*]] = fsub <2 x double> [[X1_V_I0_2]], [[X1_V_I1_2]]
; CHECK-NEXT: [[Y1:%.*]] = call <2 x double> @llvm.ceil.v2f64(<2 x double> [[X1]])
; CHECK-NEXT: [[Z1:%.*]] = fadd <2 x double> [[Y1]], [[X1_V_I1_2]]
; CHECK-NEXT: [[Z1_V_R1:%.*]] = extractelement <2 x double> [[Z1]], i32 0
; CHECK-NEXT: [[Z1_V_R2:%.*]] = extractelement <2 x double> [[Z1]], i32 1
; CHECK-NEXT: [[R:%.*]] = fmul double [[Z1_V_R1]], [[Z1_V_R2]]
; CHECK-NEXT: ret double [[R]]
;
%X1 = fsub double %A1, %B1
%X2 = fsub double %A2, %B2
%Y1 = call double @llvm.ceil.f64(double %X1)
%Y2 = call double @llvm.ceil.f64(double %X2)
%Z1 = fadd double %Y1, %B1
%Z2 = fadd double %Y2, %B2
%R = fmul double %Z1, %Z2
ret double %R
}
; Basic depth-3 chain with nearbyint
define double @testnearbyint(double %A1, double %A2, double %B1, double %B2) {
; CHECK-LABEL: @testnearbyint(
; CHECK-NEXT: [[X1_V_I1_1:%.*]] = insertelement <2 x double> undef, double [[B1:%.*]], i32 0
; CHECK-NEXT: [[X1_V_I1_2:%.*]] = insertelement <2 x double> [[X1_V_I1_1]], double [[B2:%.*]], i32 1
; CHECK-NEXT: [[X1_V_I0_1:%.*]] = insertelement <2 x double> undef, double [[A1:%.*]], i32 0
; CHECK-NEXT: [[X1_V_I0_2:%.*]] = insertelement <2 x double> [[X1_V_I0_1]], double [[A2:%.*]], i32 1
; CHECK-NEXT: [[X1:%.*]] = fsub <2 x double> [[X1_V_I0_2]], [[X1_V_I1_2]]
; CHECK-NEXT: [[Y1:%.*]] = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> [[X1]])
; CHECK-NEXT: [[Z1:%.*]] = fadd <2 x double> [[Y1]], [[X1_V_I1_2]]
; CHECK-NEXT: [[Z1_V_R1:%.*]] = extractelement <2 x double> [[Z1]], i32 0
; CHECK-NEXT: [[Z1_V_R2:%.*]] = extractelement <2 x double> [[Z1]], i32 1
; CHECK-NEXT: [[R:%.*]] = fmul double [[Z1_V_R1]], [[Z1_V_R2]]
; CHECK-NEXT: ret double [[R]]
;
%X1 = fsub double %A1, %B1
%X2 = fsub double %A2, %B2
%Y1 = call double @llvm.nearbyint.f64(double %X1)
%Y2 = call double @llvm.nearbyint.f64(double %X2)
%Z1 = fadd double %Y1, %B1
%Z2 = fadd double %Y2, %B2
%R = fmul double %Z1, %Z2
ret double %R
}
; Basic depth-3 chain with rint
define double @testrint(double %A1, double %A2, double %B1, double %B2) {
; CHECK-LABEL: @testrint(
; CHECK-NEXT: [[X1_V_I1_1:%.*]] = insertelement <2 x double> undef, double [[B1:%.*]], i32 0
; CHECK-NEXT: [[X1_V_I1_2:%.*]] = insertelement <2 x double> [[X1_V_I1_1]], double [[B2:%.*]], i32 1
; CHECK-NEXT: [[X1_V_I0_1:%.*]] = insertelement <2 x double> undef, double [[A1:%.*]], i32 0
; CHECK-NEXT: [[X1_V_I0_2:%.*]] = insertelement <2 x double> [[X1_V_I0_1]], double [[A2:%.*]], i32 1
; CHECK-NEXT: [[X1:%.*]] = fsub <2 x double> [[X1_V_I0_2]], [[X1_V_I1_2]]
; CHECK-NEXT: [[Y1:%.*]] = call <2 x double> @llvm.rint.v2f64(<2 x double> [[X1]])
; CHECK-NEXT: [[Z1:%.*]] = fadd <2 x double> [[Y1]], [[X1_V_I1_2]]
; CHECK-NEXT: [[Z1_V_R1:%.*]] = extractelement <2 x double> [[Z1]], i32 0
; CHECK-NEXT: [[Z1_V_R2:%.*]] = extractelement <2 x double> [[Z1]], i32 1
; CHECK-NEXT: [[R:%.*]] = fmul double [[Z1_V_R1]], [[Z1_V_R2]]
; CHECK-NEXT: ret double [[R]]
;
%X1 = fsub double %A1, %B1
%X2 = fsub double %A2, %B2
%Y1 = call double @llvm.rint.f64(double %X1)
%Y2 = call double @llvm.rint.f64(double %X2)
%Z1 = fadd double %Y1, %B1
%Z2 = fadd double %Y2, %B2
%R = fmul double %Z1, %Z2
ret double %R
}
; Basic depth-3 chain with trunc
define double @testtrunc(double %A1, double %A2, double %B1, double %B2) {
; CHECK-LABEL: @testtrunc(
; CHECK-NEXT: [[X1_V_I1_1:%.*]] = insertelement <2 x double> undef, double [[B1:%.*]], i32 0
; CHECK-NEXT: [[X1_V_I1_2:%.*]] = insertelement <2 x double> [[X1_V_I1_1]], double [[B2:%.*]], i32 1
; CHECK-NEXT: [[X1_V_I0_1:%.*]] = insertelement <2 x double> undef, double [[A1:%.*]], i32 0
; CHECK-NEXT: [[X1_V_I0_2:%.*]] = insertelement <2 x double> [[X1_V_I0_1]], double [[A2:%.*]], i32 1
; CHECK-NEXT: [[X1:%.*]] = fsub <2 x double> [[X1_V_I0_2]], [[X1_V_I1_2]]
; CHECK-NEXT: [[Y1:%.*]] = call <2 x double> @llvm.trunc.v2f64(<2 x double> [[X1]])
; CHECK-NEXT: [[Z1:%.*]] = fadd <2 x double> [[Y1]], [[X1_V_I1_2]]
; CHECK-NEXT: [[Z1_V_R1:%.*]] = extractelement <2 x double> [[Z1]], i32 0
; CHECK-NEXT: [[Z1_V_R2:%.*]] = extractelement <2 x double> [[Z1]], i32 1
; CHECK-NEXT: [[R:%.*]] = fmul double [[Z1_V_R1]], [[Z1_V_R2]]
; CHECK-NEXT: ret double [[R]]
;
%X1 = fsub double %A1, %B1
%X2 = fsub double %A2, %B2
%Y1 = call double @llvm.trunc.f64(double %X1)
%Y2 = call double @llvm.trunc.f64(double %X2)
%Z1 = fadd double %Y1, %B1
%Z2 = fadd double %Y2, %B2
%R = fmul double %Z1, %Z2
ret double %R
}
; Basic depth-3 chain with floor
define double @testfloor(double %A1, double %A2, double %B1, double %B2) {
; CHECK-LABEL: @testfloor(
; CHECK-NEXT: [[X1_V_I1_1:%.*]] = insertelement <2 x double> undef, double [[B1:%.*]], i32 0
; CHECK-NEXT: [[X1_V_I1_2:%.*]] = insertelement <2 x double> [[X1_V_I1_1]], double [[B2:%.*]], i32 1
; CHECK-NEXT: [[X1_V_I0_1:%.*]] = insertelement <2 x double> undef, double [[A1:%.*]], i32 0
; CHECK-NEXT: [[X1_V_I0_2:%.*]] = insertelement <2 x double> [[X1_V_I0_1]], double [[A2:%.*]], i32 1
; CHECK-NEXT: [[X1:%.*]] = fsub <2 x double> [[X1_V_I0_2]], [[X1_V_I1_2]]
; CHECK-NEXT: [[Y1:%.*]] = call <2 x double> @llvm.floor.v2f64(<2 x double> [[X1]])
; CHECK-NEXT: [[Z1:%.*]] = fadd <2 x double> [[Y1]], [[X1_V_I1_2]]
; CHECK-NEXT: [[Z1_V_R1:%.*]] = extractelement <2 x double> [[Z1]], i32 0
; CHECK-NEXT: [[Z1_V_R2:%.*]] = extractelement <2 x double> [[Z1]], i32 1
; CHECK-NEXT: [[R:%.*]] = fmul double [[Z1_V_R1]], [[Z1_V_R2]]
; CHECK-NEXT: ret double [[R]]
;
%X1 = fsub double %A1, %B1
%X2 = fsub double %A2, %B2
%Y1 = call double @llvm.floor.f64(double %X1)
%Y2 = call double @llvm.floor.f64(double %X2)
%Z1 = fadd double %Y1, %B1
%Z2 = fadd double %Y2, %B2
%R = fmul double %Z1, %Z2
ret double %R
}
; Basic depth-3 chain with fabs
define double @testfabs(double %A1, double %A2, double %B1, double %B2) {
; CHECK-LABEL: @testfabs(
; CHECK-NEXT: [[X1_V_I1_1:%.*]] = insertelement <2 x double> undef, double [[B1:%.*]], i32 0
; CHECK-NEXT: [[X1_V_I1_2:%.*]] = insertelement <2 x double> [[X1_V_I1_1]], double [[B2:%.*]], i32 1
; CHECK-NEXT: [[X1_V_I0_1:%.*]] = insertelement <2 x double> undef, double [[A1:%.*]], i32 0
; CHECK-NEXT: [[X1_V_I0_2:%.*]] = insertelement <2 x double> [[X1_V_I0_1]], double [[A2:%.*]], i32 1
; CHECK-NEXT: [[X1:%.*]] = fsub <2 x double> [[X1_V_I0_2]], [[X1_V_I1_2]]
; CHECK-NEXT: [[Y1:%.*]] = call <2 x double> @llvm.fabs.v2f64(<2 x double> [[X1]])
; CHECK-NEXT: [[Z1:%.*]] = fadd <2 x double> [[Y1]], [[X1_V_I1_2]]
; CHECK-NEXT: [[Z1_V_R1:%.*]] = extractelement <2 x double> [[Z1]], i32 0
; CHECK-NEXT: [[Z1_V_R2:%.*]] = extractelement <2 x double> [[Z1]], i32 1
; CHECK-NEXT: [[R:%.*]] = fmul double [[Z1_V_R1]], [[Z1_V_R2]]
; CHECK-NEXT: ret double [[R]]
;
%X1 = fsub double %A1, %B1
%X2 = fsub double %A2, %B2
%Y1 = call double @llvm.fabs.f64(double %X1)
%Y2 = call double @llvm.fabs.f64(double %X2)
%Z1 = fadd double %Y1, %B1
%Z2 = fadd double %Y2, %B2
%R = fmul double %Z1, %Z2
ret double %R
}
; Basic depth-3 chain with bswap
define i64 @testbswap(i64 %A1, i64 %A2, i64 %B1, i64 %B2) {
; CHECK-LABEL: @testbswap(
; CHECK-NEXT: [[X1_V_I1_1:%.*]] = insertelement <2 x i64> undef, i64 [[B1:%.*]], i32 0
; CHECK-NEXT: [[X1_V_I1_2:%.*]] = insertelement <2 x i64> [[X1_V_I1_1]], i64 [[B2:%.*]], i32 1
; CHECK-NEXT: [[X1_V_I0_1:%.*]] = insertelement <2 x i64> undef, i64 [[A1:%.*]], i32 0
; CHECK-NEXT: [[X1_V_I0_2:%.*]] = insertelement <2 x i64> [[X1_V_I0_1]], i64 [[A2:%.*]], i32 1
; CHECK-NEXT: [[X1:%.*]] = sub <2 x i64> [[X1_V_I0_2]], [[X1_V_I1_2]]
; CHECK-NEXT: [[Y1:%.*]] = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> [[X1]])
; CHECK-NEXT: [[Z1:%.*]] = add <2 x i64> [[Y1]], [[X1_V_I1_2]]
; CHECK-NEXT: [[Z1_V_R1:%.*]] = extractelement <2 x i64> [[Z1]], i32 0
; CHECK-NEXT: [[Z1_V_R2:%.*]] = extractelement <2 x i64> [[Z1]], i32 1
; CHECK-NEXT: [[R:%.*]] = mul i64 [[Z1_V_R1]], [[Z1_V_R2]]
; CHECK-NEXT: ret i64 [[R]]
;
%X1 = sub i64 %A1, %B1
%X2 = sub i64 %A2, %B2
%Y1 = call i64 @llvm.bswap.i64(i64 %X1)
%Y2 = call i64 @llvm.bswap.i64(i64 %X2)
%Z1 = add i64 %Y1, %B1
%Z2 = add i64 %Y2, %B2
%R = mul i64 %Z1, %Z2
ret i64 %R
}
; Basic depth-3 chain with ctpop
define i64 @testctpop(i64 %A1, i64 %A2, i64 %B1, i64 %B2) {
; CHECK-LABEL: @testctpop(
; CHECK-NEXT: [[X1_V_I1_1:%.*]] = insertelement <2 x i64> undef, i64 [[B1:%.*]], i32 0
; CHECK-NEXT: [[X1_V_I1_2:%.*]] = insertelement <2 x i64> [[X1_V_I1_1]], i64 [[B2:%.*]], i32 1
; CHECK-NEXT: [[X1_V_I0_1:%.*]] = insertelement <2 x i64> undef, i64 [[A1:%.*]], i32 0
; CHECK-NEXT: [[X1_V_I0_2:%.*]] = insertelement <2 x i64> [[X1_V_I0_1]], i64 [[A2:%.*]], i32 1
; CHECK-NEXT: [[X1:%.*]] = sub <2 x i64> [[X1_V_I0_2]], [[X1_V_I1_2]]
; CHECK-NEXT: [[Y1:%.*]] = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> [[X1]])
; CHECK-NEXT: [[Z1:%.*]] = add <2 x i64> [[Y1]], [[X1_V_I1_2]]
; CHECK-NEXT: [[Z1_V_R1:%.*]] = extractelement <2 x i64> [[Z1]], i32 0
; CHECK-NEXT: [[Z1_V_R2:%.*]] = extractelement <2 x i64> [[Z1]], i32 1
; CHECK-NEXT: [[R:%.*]] = mul i64 [[Z1_V_R1]], [[Z1_V_R2]]
; CHECK-NEXT: ret i64 [[R]]
;
%X1 = sub i64 %A1, %B1
%X2 = sub i64 %A2, %B2
%Y1 = call i64 @llvm.ctpop.i64(i64 %X1)
%Y2 = call i64 @llvm.ctpop.i64(i64 %X2)
%Z1 = add i64 %Y1, %B1
%Z2 = add i64 %Y2, %B2
%R = mul i64 %Z1, %Z2
ret i64 %R
}
; Basic depth-3 chain with ctlz
define i64 @testctlz(i64 %A1, i64 %A2, i64 %B1, i64 %B2) {
; CHECK-LABEL: @testctlz(
; CHECK-NEXT: [[X1_V_I1_1:%.*]] = insertelement <2 x i64> undef, i64 [[B1:%.*]], i32 0
; CHECK-NEXT: [[X1_V_I1_2:%.*]] = insertelement <2 x i64> [[X1_V_I1_1]], i64 [[B2:%.*]], i32 1
; CHECK-NEXT: [[X1_V_I0_1:%.*]] = insertelement <2 x i64> undef, i64 [[A1:%.*]], i32 0
; CHECK-NEXT: [[X1_V_I0_2:%.*]] = insertelement <2 x i64> [[X1_V_I0_1]], i64 [[A2:%.*]], i32 1
; CHECK-NEXT: [[X1:%.*]] = sub <2 x i64> [[X1_V_I0_2]], [[X1_V_I1_2]]
; CHECK-NEXT: [[Y1:%.*]] = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> [[X1]], i1 true)
; CHECK-NEXT: [[Z1:%.*]] = add <2 x i64> [[Y1]], [[X1_V_I1_2]]
; CHECK-NEXT: [[Z1_V_R1:%.*]] = extractelement <2 x i64> [[Z1]], i32 0
; CHECK-NEXT: [[Z1_V_R2:%.*]] = extractelement <2 x i64> [[Z1]], i32 1
; CHECK-NEXT: [[R:%.*]] = mul i64 [[Z1_V_R1]], [[Z1_V_R2]]
; CHECK-NEXT: ret i64 [[R]]
;
%X1 = sub i64 %A1, %B1
%X2 = sub i64 %A2, %B2
%Y1 = call i64 @llvm.ctlz.i64(i64 %X1, i1 true)
%Y2 = call i64 @llvm.ctlz.i64(i64 %X2, i1 true)
%Z1 = add i64 %Y1, %B1
%Z2 = add i64 %Y2, %B2
%R = mul i64 %Z1, %Z2
ret i64 %R
}
; Basic depth-3 chain with ctlz
define i64 @testctlzneg(i64 %A1, i64 %A2, i64 %B1, i64 %B2) {
; CHECK-LABEL: @testctlzneg(
; CHECK-NEXT: [[X1:%.*]] = sub i64 [[A1:%.*]], [[B1:%.*]]
; CHECK-NEXT: [[X2:%.*]] = sub i64 [[A2:%.*]], [[B2:%.*]]
; CHECK-NEXT: [[Y1:%.*]] = call i64 @llvm.ctlz.i64(i64 [[X1]], i1 true), !range !0
; CHECK-NEXT: [[Y2:%.*]] = call i64 @llvm.ctlz.i64(i64 [[X2]], i1 false), !range !0
; CHECK-NEXT: [[Z1:%.*]] = add i64 [[Y1]], [[B1]]
; CHECK-NEXT: [[Z2:%.*]] = add i64 [[Y2]], [[B2]]
; CHECK-NEXT: [[R:%.*]] = mul i64 [[Z1]], [[Z2]]
; CHECK-NEXT: ret i64 [[R]]
;
%X1 = sub i64 %A1, %B1
%X2 = sub i64 %A2, %B2
%Y1 = call i64 @llvm.ctlz.i64(i64 %X1, i1 true)
%Y2 = call i64 @llvm.ctlz.i64(i64 %X2, i1 false)
%Z1 = add i64 %Y1, %B1
%Z2 = add i64 %Y2, %B2
%R = mul i64 %Z1, %Z2
ret i64 %R
}
; Basic depth-3 chain with cttz
define i64 @testcttz(i64 %A1, i64 %A2, i64 %B1, i64 %B2) {
; CHECK-LABEL: @testcttz(
; CHECK-NEXT: [[X1_V_I1_1:%.*]] = insertelement <2 x i64> undef, i64 [[B1:%.*]], i32 0
; CHECK-NEXT: [[X1_V_I1_2:%.*]] = insertelement <2 x i64> [[X1_V_I1_1]], i64 [[B2:%.*]], i32 1
; CHECK-NEXT: [[X1_V_I0_1:%.*]] = insertelement <2 x i64> undef, i64 [[A1:%.*]], i32 0
; CHECK-NEXT: [[X1_V_I0_2:%.*]] = insertelement <2 x i64> [[X1_V_I0_1]], i64 [[A2:%.*]], i32 1
; CHECK-NEXT: [[X1:%.*]] = sub <2 x i64> [[X1_V_I0_2]], [[X1_V_I1_2]]
; CHECK-NEXT: [[Y1:%.*]] = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> [[X1]], i1 true)
; CHECK-NEXT: [[Z1:%.*]] = add <2 x i64> [[Y1]], [[X1_V_I1_2]]
; CHECK-NEXT: [[Z1_V_R1:%.*]] = extractelement <2 x i64> [[Z1]], i32 0
; CHECK-NEXT: [[Z1_V_R2:%.*]] = extractelement <2 x i64> [[Z1]], i32 1
; CHECK-NEXT: [[R:%.*]] = mul i64 [[Z1_V_R1]], [[Z1_V_R2]]
; CHECK-NEXT: ret i64 [[R]]
;
%X1 = sub i64 %A1, %B1
%X2 = sub i64 %A2, %B2
%Y1 = call i64 @llvm.cttz.i64(i64 %X1, i1 true)
%Y2 = call i64 @llvm.cttz.i64(i64 %X2, i1 true)
%Z1 = add i64 %Y1, %B1
%Z2 = add i64 %Y2, %B2
%R = mul i64 %Z1, %Z2
ret i64 %R
}
; Basic depth-3 chain with cttz
define i64 @testcttzneg(i64 %A1, i64 %A2, i64 %B1, i64 %B2) {
; CHECK-LABEL: @testcttzneg(
; CHECK-NEXT: [[X1:%.*]] = sub i64 [[A1:%.*]], [[B1:%.*]]
; CHECK-NEXT: [[X2:%.*]] = sub i64 [[A2:%.*]], [[B2:%.*]]
; CHECK-NEXT: [[Y1:%.*]] = call i64 @llvm.cttz.i64(i64 [[X1]], i1 true), !range !0
; CHECK-NEXT: [[Y2:%.*]] = call i64 @llvm.cttz.i64(i64 [[X2]], i1 false), !range !0
; CHECK-NEXT: [[Z1:%.*]] = add i64 [[Y1]], [[B1]]
; CHECK-NEXT: [[Z2:%.*]] = add i64 [[Y2]], [[B2]]
; CHECK-NEXT: [[R:%.*]] = mul i64 [[Z1]], [[Z2]]
; CHECK-NEXT: ret i64 [[R]]
;
%X1 = sub i64 %A1, %B1
%X2 = sub i64 %A2, %B2
%Y1 = call i64 @llvm.cttz.i64(i64 %X1, i1 true)
%Y2 = call i64 @llvm.cttz.i64(i64 %X2, i1 false)
%Z1 = add i64 %Y1, %B1
%Z2 = add i64 %Y2, %B2
%R = mul i64 %Z1, %Z2
ret i64 %R
}
; CHECK: declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) #0
; CHECK: declare <2 x double> @llvm.fmuladd.v2f64(<2 x double>, <2 x double>, <2 x double>) #0
; CHECK: declare <2 x double> @llvm.cos.v2f64(<2 x double>) #0
; CHECK: declare <2 x double> @llvm.powi.v2f64(<2 x double>, i32) #0
; CHECK: declare <2 x double> @llvm.round.v2f64(<2 x double>) #0
; CHECK: declare <2 x double> @llvm.copysign.v2f64(<2 x double>, <2 x double>) #0
; CHECK: declare <2 x double> @llvm.ceil.v2f64(<2 x double>) #0
; CHECK: declare <2 x double> @llvm.nearbyint.v2f64(<2 x double>) #0
; CHECK: declare <2 x double> @llvm.rint.v2f64(<2 x double>) #0
; CHECK: declare <2 x double> @llvm.trunc.v2f64(<2 x double>) #0
; CHECK: declare <2 x double> @llvm.floor.v2f64(<2 x double>) #0
; CHECK: declare <2 x double> @llvm.fabs.v2f64(<2 x double>) #0
; CHECK: declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>) #0
; CHECK: declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>) #0
; CHECK: declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>, i1) #0
; CHECK: declare <2 x i64> @llvm.cttz.v2i64(<2 x i64>, i1) #0
; CHECK: attributes #0 = { nounwind readnone speculatable }

View File

@ -1,134 +0,0 @@
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s
; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-aligned-only -instcombine -gvn -S | FileCheck %s -check-prefix=CHECK-AO
; FIXME: re-enable this once pointer vectors work properly
; XFAIL: *
; Simple 3-pair chain also with loads and stores (using ptrs and gep)
define double @test1(i64* %a, i64* %b, i64* %c) nounwind uwtable readonly {
entry:
%i0 = load i64, i64* %a, align 8
%i1 = load i64, i64* %b, align 8
%mul = mul i64 %i0, %i1
%arrayidx3 = getelementptr inbounds i64, i64* %a, i64 1
%i3 = load i64, i64* %arrayidx3, align 8
%arrayidx4 = getelementptr inbounds i64, i64* %b, i64 1
%i4 = load i64, i64* %arrayidx4, align 8
%mul5 = mul i64 %i3, %i4
%ptr = inttoptr i64 %mul to double*
%ptr5 = inttoptr i64 %mul5 to double*
%aptr = getelementptr inbounds double, double* %ptr, i64 2
%aptr5 = getelementptr inbounds double, double* %ptr5, i64 3
%av = load double, double* %aptr, align 16
%av5 = load double, double* %aptr5, align 16
%r = fmul double %av, %av5
store i64 %mul, i64* %c, align 8
%arrayidx5 = getelementptr inbounds i64, i64* %c, i64 1
store i64 %mul5, i64* %arrayidx5, align 8
ret double %r
; CHECK-LABEL: @test1(
; CHECK: %i0.v.i0 = bitcast i64* %a to <2 x i64>*
; CHECK: %i1.v.i0 = bitcast i64* %b to <2 x i64>*
; CHECK: %i0 = load <2 x i64>, <2 x i64>* %i0.v.i0, align 8
; CHECK: %i1 = load <2 x i64>, <2 x i64>* %i1.v.i0, align 8
; CHECK: %mul = mul <2 x i64> %i0, %i1
; CHECK: %ptr = inttoptr <2 x i64> %mul to <2 x double*>
; CHECK: %aptr = getelementptr inbounds double, <2 x double*> %ptr, <2 x i64> <i64 2, i64 3>
; CHECK: %aptr.v.r1 = extractelement <2 x double*> %aptr, i32 0
; CHECK: %aptr.v.r2 = extractelement <2 x double*> %aptr, i32 1
; CHECK: %av = load double, double* %aptr.v.r1, align 16
; CHECK: %av5 = load double, double* %aptr.v.r2, align 16
; CHECK: %r = fmul double %av, %av5
; CHECK: %0 = bitcast i64* %c to <2 x i64>*
; CHECK: store <2 x i64> %mul, <2 x i64>* %0, align 8
; CHECK: ret double %r
; CHECK-AO-LABEL: @test1(
; CHECK-AO-NOT: load <2 x
}
; Simple 3-pair chain with loads and stores (using ptrs and gep)
define void @test2(i64** %a, i64** %b, i64** %c) nounwind uwtable readonly {
entry:
%i0 = load i64*, i64** %a, align 8
%i1 = load i64*, i64** %b, align 8
%arrayidx3 = getelementptr inbounds i64*, i64** %a, i64 1
%i3 = load i64*, i64** %arrayidx3, align 8
%arrayidx4 = getelementptr inbounds i64*, i64** %b, i64 1
%i4 = load i64*, i64** %arrayidx4, align 8
%o1 = load i64, i64* %i1, align 8
%o4 = load i64, i64* %i4, align 8
%ptr0 = getelementptr inbounds i64, i64* %i0, i64 %o1
%ptr3 = getelementptr inbounds i64, i64* %i3, i64 %o4
store i64* %ptr0, i64** %c, align 8
%arrayidx5 = getelementptr inbounds i64*, i64** %c, i64 1
store i64* %ptr3, i64** %arrayidx5, align 8
ret void
; CHECK-LABEL: @test2(
; CHECK: %i0.v.i0 = bitcast i64** %a to <2 x i64*>*
; CHECK: %i1 = load i64*, i64** %b, align 8
; CHECK: %i0 = load <2 x i64*>, <2 x i64*>* %i0.v.i0, align 8
; CHECK: %arrayidx4 = getelementptr inbounds i64*, i64** %b, i64 1
; CHECK: %i4 = load i64*, i64** %arrayidx4, align 8
; CHECK: %o1 = load i64, i64* %i1, align 8
; CHECK: %o4 = load i64, i64* %i4, align 8
; CHECK: %ptr0.v.i1.1 = insertelement <2 x i64> undef, i64 %o1, i32 0
; CHECK: %ptr0.v.i1.2 = insertelement <2 x i64> %ptr0.v.i1.1, i64 %o4, i32 1
; CHECK: %ptr0 = getelementptr inbounds i64, <2 x i64*> %i0, <2 x i64> %ptr0.v.i1.2
; CHECK: %0 = bitcast i64** %c to <2 x i64*>*
; CHECK: store <2 x i64*> %ptr0, <2 x i64*>* %0, align 8
; CHECK: ret void
; CHECK-AO-LABEL: @test2(
; CHECK-AO-NOT: <2 x
}
; Simple 3-pair chain with loads and stores (using ptrs and gep)
; using pointer vectors.
define void @test3(<2 x i64*>* %a, <2 x i64*>* %b, <2 x i64*>* %c) nounwind uwtable readonly {
entry:
%i0 = load <2 x i64*>, <2 x i64*>* %a, align 8
%i1 = load <2 x i64*>, <2 x i64*>* %b, align 8
%arrayidx3 = getelementptr inbounds <2 x i64*>, <2 x i64*>* %a, i64 1
%i3 = load <2 x i64*>, <2 x i64*>* %arrayidx3, align 8
%arrayidx4 = getelementptr inbounds <2 x i64*>, <2 x i64*>* %b, i64 1
%i4 = load <2 x i64*>, <2 x i64*>* %arrayidx4, align 8
%j1 = extractelement <2 x i64*> %i1, i32 0
%j4 = extractelement <2 x i64*> %i4, i32 0
%o1 = load i64, i64* %j1, align 8
%o4 = load i64, i64* %j4, align 8
%j0 = extractelement <2 x i64*> %i0, i32 0
%j3 = extractelement <2 x i64*> %i3, i32 0
%ptr0 = getelementptr inbounds i64, i64* %j0, i64 %o1
%ptr3 = getelementptr inbounds i64, i64* %j3, i64 %o4
%qtr0 = insertelement <2 x i64*> undef, i64* %ptr0, i32 0
%rtr0 = insertelement <2 x i64*> %qtr0, i64* %ptr0, i32 1
%qtr3 = insertelement <2 x i64*> undef, i64* %ptr3, i32 0
%rtr3 = insertelement <2 x i64*> %qtr3, i64* %ptr3, i32 1
store <2 x i64*> %rtr0, <2 x i64*>* %c, align 8
%arrayidx5 = getelementptr inbounds <2 x i64*>, <2 x i64*>* %c, i64 1
store <2 x i64*> %rtr3, <2 x i64*>* %arrayidx5, align 8
ret void
; CHECK-LABEL: @test3(
; CHECK: %i0.v.i0 = bitcast <2 x i64*>* %a to <4 x i64*>*
; CHECK: %i1 = load <2 x i64*>, <2 x i64*>* %b, align 8
; CHECK: %i0 = load <4 x i64*>, <4 x i64*>* %i0.v.i0, align 8
; CHECK: %arrayidx4 = getelementptr inbounds <2 x i64*>, <2 x i64*>* %b, i64 1
; CHECK: %i4 = load <2 x i64*>, <2 x i64*>* %arrayidx4, align 8
; CHECK: %j1 = extractelement <2 x i64*> %i1, i32 0
; CHECK: %j4 = extractelement <2 x i64*> %i4, i32 0
; CHECK: %o1 = load i64, i64* %j1, align 8
; CHECK: %o4 = load i64, i64* %j4, align 8
; CHECK: %ptr0.v.i1.1 = insertelement <2 x i64> undef, i64 %o1, i32 0
; CHECK: %ptr0.v.i1.2 = insertelement <2 x i64> %ptr0.v.i1.1, i64 %o4, i32 1
; CHECK: %ptr0.v.i0 = shufflevector <4 x i64*> %i0, <4 x i64*> undef, <2 x i32> <i32 0, i32 2>
; CHECK: %ptr0 = getelementptr inbounds i64, <2 x i64*> %ptr0.v.i0, <2 x i64> %ptr0.v.i1.2
; CHECK: %rtr0 = shufflevector <2 x i64*> %ptr0, <2 x i64*> undef, <2 x i32> zeroinitializer
; CHECK: %rtr3 = shufflevector <2 x i64*> %ptr0, <2 x i64*> undef, <2 x i32> <i32 1, i32 1>
; CHECK: %0 = bitcast <2 x i64*>* %c to <4 x i64*>*
; CHECK: %1 = shufflevector <2 x i64*> %rtr0, <2 x i64*> %rtr3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; CHECK: store <4 x i64*> %1, <4 x i64*>* %0, align 8
; CHECK: ret void
; CHECK-AO-LABEL: @test3(
; CHECK-AO-NOT: <4 x
}

View File

@ -1,170 +0,0 @@
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-ignore-target-info -instcombine -gvn -S | FileCheck %s
; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-aligned-only -bb-vectorize-ignore-target-info -instcombine -gvn -S | FileCheck %s -check-prefix=CHECK-AO
; Simple 3-pair chain with loads and stores
define void @test1(double* %a, double* %b, double* %c) nounwind uwtable readonly {
entry:
%i0 = load double, double* %a, align 8
%i1 = load double, double* %b, align 8
%mul = fmul double %i0, %i1
%arrayidx3 = getelementptr inbounds double, double* %a, i64 1
%i3 = load double, double* %arrayidx3, align 8
%arrayidx4 = getelementptr inbounds double, double* %b, i64 1
%i4 = load double, double* %arrayidx4, align 8
%mul5 = fmul double %i3, %i4
store double %mul, double* %c, align 8
%arrayidx5 = getelementptr inbounds double, double* %c, i64 1
store double %mul5, double* %arrayidx5, align 8
ret void
; CHECK-LABEL: @test1(
; CHECK: %i0.v.i0 = bitcast double* %a to <2 x double>*
; CHECK: %i1.v.i0 = bitcast double* %b to <2 x double>*
; CHECK: %i0 = load <2 x double>, <2 x double>* %i0.v.i0, align 8
; CHECK: %i1 = load <2 x double>, <2 x double>* %i1.v.i0, align 8
; CHECK: %mul = fmul <2 x double> %i0, %i1
; CHECK: %0 = bitcast double* %c to <2 x double>*
; CHECK: store <2 x double> %mul, <2 x double>* %0, align 8
; CHECK: ret void
; CHECK-AO-LABEL: @test1(
; CHECK-AO-NOT: <2 x double>
}
; Simple chain with extending loads and stores
define void @test2(float* %a, float* %b, double* %c) nounwind uwtable readonly {
entry:
%i0f = load float, float* %a, align 4
%i0 = fpext float %i0f to double
%i1f = load float, float* %b, align 4
%i1 = fpext float %i1f to double
%mul = fmul double %i0, %i1
%arrayidx3 = getelementptr inbounds float, float* %a, i64 1
%i3f = load float, float* %arrayidx3, align 4
%i3 = fpext float %i3f to double
%arrayidx4 = getelementptr inbounds float, float* %b, i64 1
%i4f = load float, float* %arrayidx4, align 4
%i4 = fpext float %i4f to double
%mul5 = fmul double %i3, %i4
store double %mul, double* %c, align 8
%arrayidx5 = getelementptr inbounds double, double* %c, i64 1
store double %mul5, double* %arrayidx5, align 8
ret void
; CHECK-LABEL: @test2(
; CHECK: %i0f.v.i0 = bitcast float* %a to <2 x float>*
; CHECK: %i1f.v.i0 = bitcast float* %b to <2 x float>*
; CHECK: %i0f = load <2 x float>, <2 x float>* %i0f.v.i0, align 4
; CHECK: %i0 = fpext <2 x float> %i0f to <2 x double>
; CHECK: %i1f = load <2 x float>, <2 x float>* %i1f.v.i0, align 4
; CHECK: %i1 = fpext <2 x float> %i1f to <2 x double>
; CHECK: %mul = fmul <2 x double> %i0, %i1
; CHECK: %0 = bitcast double* %c to <2 x double>*
; CHECK: store <2 x double> %mul, <2 x double>* %0, align 8
; CHECK: ret void
; CHECK-AO-LABEL: @test2(
; CHECK-AO-NOT: <2 x double>
}
; Simple chain with loads and truncating stores
define void @test3(double* %a, double* %b, float* %c) nounwind uwtable readonly {
entry:
%i0 = load double, double* %a, align 8
%i1 = load double, double* %b, align 8
%mul = fmul double %i0, %i1
%mulf = fptrunc double %mul to float
%arrayidx3 = getelementptr inbounds double, double* %a, i64 1
%i3 = load double, double* %arrayidx3, align 8
%arrayidx4 = getelementptr inbounds double, double* %b, i64 1
%i4 = load double, double* %arrayidx4, align 8
%mul5 = fmul double %i3, %i4
%mul5f = fptrunc double %mul5 to float
store float %mulf, float* %c, align 8
%arrayidx5 = getelementptr inbounds float, float* %c, i64 1
store float %mul5f, float* %arrayidx5, align 4
ret void
; CHECK-LABEL: @test3(
; CHECK: %i0.v.i0 = bitcast double* %a to <2 x double>*
; CHECK: %i1.v.i0 = bitcast double* %b to <2 x double>*
; CHECK: %i0 = load <2 x double>, <2 x double>* %i0.v.i0, align 8
; CHECK: %i1 = load <2 x double>, <2 x double>* %i1.v.i0, align 8
; CHECK: %mul = fmul <2 x double> %i0, %i1
; CHECK: %mulf = fptrunc <2 x double> %mul to <2 x float>
; CHECK: %0 = bitcast float* %c to <2 x float>*
; CHECK: store <2 x float> %mulf, <2 x float>* %0, align 8
; CHECK: ret void
; CHECK-AO-LABEL: @test3(
; CHECK-AO: %i0 = load double, double* %a, align 8
; CHECK-AO: %i1 = load double, double* %b, align 8
; CHECK-AO: %arrayidx3 = getelementptr inbounds double, double* %a, i64 1
; CHECK-AO: %i3 = load double, double* %arrayidx3, align 8
; CHECK-AO: %arrayidx4 = getelementptr inbounds double, double* %b, i64 1
; CHECK-AO: %i4 = load double, double* %arrayidx4, align 8
; CHECK-AO: %mul.v.i1.1 = insertelement <2 x double> undef, double %i1, i32 0
; CHECK-AO: %mul.v.i1.2 = insertelement <2 x double> %mul.v.i1.1, double %i4, i32 1
; CHECK-AO: %mul.v.i0.1 = insertelement <2 x double> undef, double %i0, i32 0
; CHECK-AO: %mul.v.i0.2 = insertelement <2 x double> %mul.v.i0.1, double %i3, i32 1
; CHECK-AO: %mul = fmul <2 x double> %mul.v.i0.2, %mul.v.i1.2
; CHECK-AO: %mulf = fptrunc <2 x double> %mul to <2 x float>
; CHECK-AO: %0 = bitcast float* %c to <2 x float>*
; CHECK-AO: store <2 x float> %mulf, <2 x float>* %0, align 8
; CHECK-AO: ret void
}
; Simple 3-pair chain with loads and stores (unreachable)
define void @test4(i1 %bool, double* %a, double* %b, double* %c) nounwind uwtable readonly {
entry:
br i1 %bool, label %if.then1, label %if.end
if.then1:
unreachable
br label %if.then
if.then:
%i0 = load double, double* %a, align 8
%i1 = load double, double* %b, align 8
%mul = fmul double %i0, %i1
%arrayidx3 = getelementptr inbounds double, double* %a, i64 1
%i3 = load double, double* %arrayidx3, align 8
%arrayidx4 = getelementptr inbounds double, double* %b, i64 1
%i4 = load double, double* %arrayidx4, align 8
%mul5 = fmul double %i3, %i4
store double %mul, double* %c, align 8
%arrayidx5 = getelementptr inbounds double, double* %c, i64 1
store double %mul5, double* %arrayidx5, align 8
br label %if.end
if.end:
ret void
; CHECK-LABEL: @test4(
; CHECK-NOT: <2 x double>
; CHECK-AO-LABEL: @test4(
; CHECK-AO-NOT: <2 x double>
}
; Simple 3-pair chain with loads and stores
define void @test5(double* %a, double* %b, double* %c) nounwind uwtable readonly {
entry:
%i0 = load double, double* %a, align 8
%i1 = load double, double* %b, align 8
%mul = fmul double %i0, %i1
%arrayidx3 = getelementptr inbounds double, double* %a, i64 1
%i3 = load double, double* %arrayidx3, align 8
%arrayidx4 = getelementptr inbounds double, double* %b, i64 1
%i4 = load double, double* %arrayidx4, align 8
%mul5 = fmul double %i3, %i4
%arrayidx5 = getelementptr inbounds double, double* %c, i64 1
store double %mul5, double* %arrayidx5, align 8
store double %mul, double* %c, align 4
ret void
; CHECK-LABEL: @test5(
; CHECK: %i0.v.i0 = bitcast double* %a to <2 x double>*
; CHECK: %i1.v.i0 = bitcast double* %b to <2 x double>*
; CHECK: %i0 = load <2 x double>, <2 x double>* %i0.v.i0, align 8
; CHECK: %i1 = load <2 x double>, <2 x double>* %i1.v.i0, align 8
; CHECK: %mul = fmul <2 x double> %i0, %i1
; CHECK: %0 = bitcast double* %c to <2 x double>*
; CHECK: store <2 x double> %mul, <2 x double>* %0, align 4
; CHECK: ret void
; CHECK-AO-LABEL: @test5(
; CHECK-AO-NOT: <2 x double>
}

View File

@ -1,59 +0,0 @@
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-ignore-target-info -instcombine -gvn -S | FileCheck %s
; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-no-bools -bb-vectorize-ignore-target-info -instcombine -gvn -S | FileCheck %s -check-prefix=CHECK-NB
; Basic depth-3 chain with select
define double @test1(double %A1, double %A2, double %B1, double %B2, i1 %C1, i1 %C2) {
; CHECK-LABEL: @test1(
; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
%X1 = fsub double %A1, %B1
%X2 = fsub double %A2, %B2
; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
%Y1 = fmul double %X1, %A1
%Y2 = fmul double %X2, %A2
; CHECK: %Y1 = fmul <2 x double> %X1, %X1.v.i0.2
%Z1 = select i1 %C1, double %Y1, double %B1
%Z2 = select i1 %C2, double %Y2, double %B2
; CHECK: %Z1.v.i0.1 = insertelement <2 x i1> undef, i1 %C1, i32 0
; CHECK: %Z1.v.i0.2 = insertelement <2 x i1> %Z1.v.i0.1, i1 %C2, i32 1
; CHECK: %Z1 = select <2 x i1> %Z1.v.i0.2, <2 x double> %Y1, <2 x double> %X1.v.i1.2
%R = fmul double %Z1, %Z2
; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0
; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1
; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2
ret double %R
; CHECK: ret double %R
}
; Basic depth-3 chain with select (and vect. compare)
define double @test2(double %A1, double %A2, double %B1, double %B2) {
; CHECK-LABEL: @test2(
; CHECK-NB-LABEL: @test2(
; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
%X1 = fsub double %A1, %B1
%X2 = fsub double %A2, %B2
; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
%Y1 = fmul double %X1, %A1
%Y2 = fmul double %X2, %A2
; CHECK: %Y1 = fmul <2 x double> %X1, %X1.v.i0.2
%C1 = fcmp ogt double %X1, %A1
%C2 = fcmp ogt double %X2, %A2
; CHECK: %C1 = fcmp ogt <2 x double> %X1, %X1.v.i0.2
; CHECK-NB: fcmp ogt double
%Z1 = select i1 %C1, double %Y1, double %B1
%Z2 = select i1 %C2, double %Y2, double %B2
; CHECK: %Z1 = select <2 x i1> %C1, <2 x double> %Y1, <2 x double> %X1.v.i1.2
%R = fmul double %Z1, %Z2
; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0
; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1
; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2
ret double %R
; CHECK: ret double %R
}

View File

@ -1,18 +0,0 @@
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
target triple = "powerpc64-unknown-linux"
; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-vector-bits=256 -instcombine -gvn -S | FileCheck %s
; Basic depth-3 chain (target-specific type should not vectorize)
define ppc_fp128 @test7(ppc_fp128 %A1, ppc_fp128 %A2, ppc_fp128 %B1, ppc_fp128 %B2) {
; CHECK-LABEL: @test7(
; CHECK-NOT: <2 x ppc_fp128>
%X1 = fsub ppc_fp128 %A1, %B1
%X2 = fsub ppc_fp128 %A2, %B2
%Y1 = fmul ppc_fp128 %X1, %A1
%Y2 = fmul ppc_fp128 %X2, %A2
%Z1 = fadd ppc_fp128 %Y1, %B1
%Z2 = fadd ppc_fp128 %Y2, %B2
%R = fmul ppc_fp128 %Z1, %Z2
ret ppc_fp128 %R
}

View File

@ -1,209 +0,0 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-ignore-target-info -instcombine -gvn -S | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
; Basic depth-3 chain
define double @test1(double %A1, double %A2, double %B1, double %B2) {
; CHECK-LABEL: @test1(
; CHECK-NEXT: [[X1_V_I1_1:%.*]] = insertelement <2 x double> undef, double [[B1:%.*]], i32 0
; CHECK-NEXT: [[X1_V_I1_2:%.*]] = insertelement <2 x double> [[X1_V_I1_1]], double [[B2:%.*]], i32 1
; CHECK-NEXT: [[X1_V_I0_1:%.*]] = insertelement <2 x double> undef, double [[A1:%.*]], i32 0
; CHECK-NEXT: [[X1_V_I0_2:%.*]] = insertelement <2 x double> [[X1_V_I0_1]], double [[A2:%.*]], i32 1
; CHECK-NEXT: [[X1:%.*]] = fsub <2 x double> [[X1_V_I0_2]], [[X1_V_I1_2]]
; CHECK-NEXT: [[Y1:%.*]] = fmul <2 x double> [[X1]], [[X1_V_I0_2]]
; CHECK-NEXT: [[Z1:%.*]] = fadd <2 x double> [[Y1]], [[X1_V_I1_2]]
; CHECK-NEXT: [[Z1_V_R1:%.*]] = extractelement <2 x double> [[Z1]], i32 0
; CHECK-NEXT: [[Z1_V_R2:%.*]] = extractelement <2 x double> [[Z1]], i32 1
; CHECK-NEXT: [[R:%.*]] = fmul double [[Z1_V_R1]], [[Z1_V_R2]]
; CHECK-NEXT: ret double [[R]]
;
%X1 = fsub double %A1, %B1
%X2 = fsub double %A2, %B2
%Y1 = fmul double %X1, %A1
%Y2 = fmul double %X2, %A2
%Z1 = fadd double %Y1, %B1
%Z2 = fadd double %Y2, %B2
%R = fmul double %Z1, %Z2
ret double %R
}
; Basic depth-3 chain (last pair permuted)
define double @test2(double %A1, double %A2, double %B1, double %B2) {
; CHECK-LABEL: @test2(
; CHECK-NEXT: [[X1_V_I1_1:%.*]] = insertelement <2 x double> undef, double [[B1:%.*]], i32 0
; CHECK-NEXT: [[X1_V_I1_2:%.*]] = insertelement <2 x double> [[X1_V_I1_1]], double [[B2:%.*]], i32 1
; CHECK-NEXT: [[X1_V_I0_1:%.*]] = insertelement <2 x double> undef, double [[A1:%.*]], i32 0
; CHECK-NEXT: [[X1_V_I0_2:%.*]] = insertelement <2 x double> [[X1_V_I0_1]], double [[A2:%.*]], i32 1
; CHECK-NEXT: [[X1:%.*]] = fsub <2 x double> [[X1_V_I0_2]], [[X1_V_I1_2]]
; CHECK-NEXT: [[Y1:%.*]] = fmul <2 x double> [[X1]], [[X1_V_I0_2]]
; CHECK-NEXT: [[Z1_V_I1_1:%.*]] = insertelement <2 x double> undef, double [[B2]], i32 0
; CHECK-NEXT: [[Z1_V_I1_2:%.*]] = insertelement <2 x double> [[Z1_V_I1_1]], double [[B1]], i32 1
; CHECK-NEXT: [[Z2:%.*]] = fadd <2 x double> [[Y1]], [[Z1_V_I1_2]]
; CHECK-NEXT: [[Z2_V_R1:%.*]] = extractelement <2 x double> [[Z2]], i32 0
; CHECK-NEXT: [[Z2_V_R2:%.*]] = extractelement <2 x double> [[Z2]], i32 1
; CHECK-NEXT: [[R:%.*]] = fmul double [[Z2_V_R2]], [[Z2_V_R1]]
; CHECK-NEXT: ret double [[R]]
;
%X1 = fsub double %A1, %B1
%X2 = fsub double %A2, %B2
%Y1 = fmul double %X1, %A1
%Y2 = fmul double %X2, %A2
%Z1 = fadd double %Y2, %B1
%Z2 = fadd double %Y1, %B2
%R = fmul double %Z1, %Z2
ret double %R
}
; Basic depth-3 chain (last pair first splat)
define double @test3(double %A1, double %A2, double %B1, double %B2) {
; CHECK-LABEL: @test3(
; CHECK-NEXT: [[X1_V_I1_1:%.*]] = insertelement <2 x double> undef, double [[B1:%.*]], i32 0
; CHECK-NEXT: [[X1_V_I1_2:%.*]] = insertelement <2 x double> [[X1_V_I1_1]], double [[B2:%.*]], i32 1
; CHECK-NEXT: [[X1_V_I0_1:%.*]] = insertelement <2 x double> undef, double [[A1:%.*]], i32 0
; CHECK-NEXT: [[X1_V_I0_2:%.*]] = insertelement <2 x double> [[X1_V_I0_1]], double [[A2:%.*]], i32 1
; CHECK-NEXT: [[X1:%.*]] = fsub <2 x double> [[X1_V_I0_2]], [[X1_V_I1_2]]
; CHECK-NEXT: [[Y1:%.*]] = fmul <2 x double> [[X1]], [[X1_V_I0_2]]
; CHECK-NEXT: [[Z1_V_I0:%.*]] = shufflevector <2 x double> [[Y1]], <2 x double> undef, <2 x i32> <i32 1, i32 1>
; CHECK-NEXT: [[Z1:%.*]] = fadd <2 x double> [[Z1_V_I0]], [[X1_V_I1_2]]
; CHECK-NEXT: [[Z1_V_R1:%.*]] = extractelement <2 x double> [[Z1]], i32 0
; CHECK-NEXT: [[Z1_V_R2:%.*]] = extractelement <2 x double> [[Z1]], i32 1
; CHECK-NEXT: [[R:%.*]] = fmul double [[Z1_V_R1]], [[Z1_V_R2]]
; CHECK-NEXT: ret double [[R]]
;
%X1 = fsub double %A1, %B1
%X2 = fsub double %A2, %B2
%Y1 = fmul double %X1, %A1
%Y2 = fmul double %X2, %A2
%Z1 = fadd double %Y2, %B1
%Z2 = fadd double %Y2, %B2
%R = fmul double %Z1, %Z2
ret double %R
}
; Basic depth-3 chain (last pair second splat)
define double @test4(double %A1, double %A2, double %B1, double %B2) {
; CHECK-LABEL: @test4(
; CHECK-NEXT: [[X1_V_I1_1:%.*]] = insertelement <2 x double> undef, double [[B1:%.*]], i32 0
; CHECK-NEXT: [[X1_V_I1_2:%.*]] = insertelement <2 x double> [[X1_V_I1_1]], double [[B2:%.*]], i32 1
; CHECK-NEXT: [[X1_V_I0_1:%.*]] = insertelement <2 x double> undef, double [[A1:%.*]], i32 0
; CHECK-NEXT: [[X1_V_I0_2:%.*]] = insertelement <2 x double> [[X1_V_I0_1]], double [[A2:%.*]], i32 1
; CHECK-NEXT: [[X1:%.*]] = fsub <2 x double> [[X1_V_I0_2]], [[X1_V_I1_2]]
; CHECK-NEXT: [[Y1:%.*]] = fmul <2 x double> [[X1]], [[X1_V_I0_2]]
; CHECK-NEXT: [[Z1_V_I0:%.*]] = shufflevector <2 x double> [[Y1]], <2 x double> undef, <2 x i32> zeroinitializer
; CHECK-NEXT: [[Z1:%.*]] = fadd <2 x double> [[Z1_V_I0]], [[X1_V_I1_2]]
; CHECK-NEXT: [[Z1_V_R1:%.*]] = extractelement <2 x double> [[Z1]], i32 0
; CHECK-NEXT: [[Z1_V_R2:%.*]] = extractelement <2 x double> [[Z1]], i32 1
; CHECK-NEXT: [[R:%.*]] = fmul double [[Z1_V_R1]], [[Z1_V_R2]]
; CHECK-NEXT: ret double [[R]]
;
%X1 = fsub double %A1, %B1
%X2 = fsub double %A2, %B2
%Y1 = fmul double %X1, %A1
%Y2 = fmul double %X2, %A2
%Z1 = fadd double %Y1, %B1
%Z2 = fadd double %Y1, %B2
%R = fmul double %Z1, %Z2
ret double %R
}
; Basic depth-3 chain
define <2 x float> @test5(<2 x float> %A1, <2 x float> %A2, <2 x float> %B1, <2 x float> %B2) {
; CHECK-LABEL: @test5(
; CHECK-NEXT: [[X1_V_I1:%.*]] = shufflevector <2 x float> [[B1:%.*]], <2 x float> [[B2:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT: [[X1_V_I0:%.*]] = shufflevector <2 x float> [[A1:%.*]], <2 x float> [[A2:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT: [[X1:%.*]] = fsub <4 x float> [[X1_V_I0]], [[X1_V_I1]]
; CHECK-NEXT: [[Y1:%.*]] = fmul <4 x float> [[X1]], [[X1_V_I0]]
; CHECK-NEXT: [[Z1:%.*]] = fadd <4 x float> [[Y1]], [[X1_V_I1]]
; CHECK-NEXT: [[Z1_V_R1:%.*]] = shufflevector <4 x float> [[Z1]], <4 x float> undef, <2 x i32> <i32 0, i32 1>
; CHECK-NEXT: [[Z1_V_R2:%.*]] = shufflevector <4 x float> [[Z1]], <4 x float> undef, <2 x i32> <i32 2, i32 3>
; CHECK-NEXT: [[R:%.*]] = fmul <2 x float> [[Z1_V_R1]], [[Z1_V_R2]]
; CHECK-NEXT: ret <2 x float> [[R]]
;
%X1 = fsub <2 x float> %A1, %B1
%X2 = fsub <2 x float> %A2, %B2
%Y1 = fmul <2 x float> %X1, %A1
%Y2 = fmul <2 x float> %X2, %A2
%Z1 = fadd <2 x float> %Y1, %B1
%Z2 = fadd <2 x float> %Y2, %B2
%R = fmul <2 x float> %Z1, %Z2
ret <2 x float> %R
}
; Basic chain with shuffles
define <8 x i8> @test6(<8 x i8> %A1, <8 x i8> %A2, <8 x i8> %B1, <8 x i8> %B2) {
; CHECK-LABEL: @test6(
; CHECK-NEXT: [[X1_V_I1:%.*]] = shufflevector <8 x i8> [[B1:%.*]], <8 x i8> [[B2:%.*]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT: [[X1_V_I0:%.*]] = shufflevector <8 x i8> [[A1:%.*]], <8 x i8> [[A2:%.*]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT: [[X1:%.*]] = sub <16 x i8> [[X1_V_I0]], [[X1_V_I1]]
; CHECK-NEXT: [[Y1:%.*]] = mul <16 x i8> [[X1]], [[X1_V_I0]]
; CHECK-NEXT: [[Z1:%.*]] = add <16 x i8> [[Y1]], [[X1_V_I1]]
; CHECK-NEXT: [[Q1_V_I1:%.*]] = shufflevector <16 x i8> [[Z1]], <16 x i8> undef, <16 x i32> <i32 8, i32 undef, i32 10, i32 undef, i32 undef, i32 13, i32 undef, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[Q1:%.*]] = shufflevector <16 x i8> [[Z1]], <16 x i8> [[Q1_V_I1]], <16 x i32> <i32 23, i32 16, i32 6, i32 1, i32 21, i32 18, i32 4, i32 3, i32 14, i32 15, i32 8, i32 9, i32 10, i32 12, i32 12, i32 9>
; CHECK-NEXT: [[Q1_V_R1:%.*]] = shufflevector <16 x i8> [[Q1]], <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; CHECK-NEXT: [[Q1_V_R2:%.*]] = shufflevector <16 x i8> [[Q1]], <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT: [[R:%.*]] = mul <8 x i8> [[Q1_V_R1]], [[Q1_V_R2]]
; CHECK-NEXT: ret <8 x i8> [[R]]
;
%X1 = sub <8 x i8> %A1, %B1
%X2 = sub <8 x i8> %A2, %B2
%Y1 = mul <8 x i8> %X1, %A1
%Y2 = mul <8 x i8> %X2, %A2
%Z1 = add <8 x i8> %Y1, %B1
%Z2 = add <8 x i8> %Y2, %B2
%Q1 = shufflevector <8 x i8> %Z1, <8 x i8> %Z2, <8 x i32> <i32 15, i32 8, i32 6, i32 1, i32 13, i32 10, i32 4, i32 3>
%Q2 = shufflevector <8 x i8> %Z2, <8 x i8> %Z2, <8 x i32> <i32 6, i32 7, i32 0, i32 1, i32 2, i32 4, i32 4, i32 1>
%R = mul <8 x i8> %Q1, %Q2
ret <8 x i8> %R
}
; Basic depth-3 chain (flipped order)
define double @test7(double %A1, double %A2, double %B1, double %B2) {
; CHECK-LABEL: @test7(
; CHECK-NEXT: [[X1_V_I1_1:%.*]] = insertelement <2 x double> undef, double [[B1:%.*]], i32 0
; CHECK-NEXT: [[X1_V_I1_2:%.*]] = insertelement <2 x double> [[X1_V_I1_1]], double [[B2:%.*]], i32 1
; CHECK-NEXT: [[X1_V_I0_1:%.*]] = insertelement <2 x double> undef, double [[A1:%.*]], i32 0
; CHECK-NEXT: [[X1_V_I0_2:%.*]] = insertelement <2 x double> [[X1_V_I0_1]], double [[A2:%.*]], i32 1
; CHECK-NEXT: [[X1:%.*]] = fsub <2 x double> [[X1_V_I0_2]], [[X1_V_I1_2]]
; CHECK-NEXT: [[Y1:%.*]] = fmul <2 x double> [[X1]], [[X1_V_I0_2]]
; CHECK-NEXT: [[Z1:%.*]] = fadd <2 x double> [[Y1]], [[X1_V_I1_2]]
; CHECK-NEXT: [[Z1_V_R1:%.*]] = extractelement <2 x double> [[Z1]], i32 0
; CHECK-NEXT: [[Z1_V_R2:%.*]] = extractelement <2 x double> [[Z1]], i32 1
; CHECK-NEXT: [[R:%.*]] = fmul double [[Z1_V_R1]], [[Z1_V_R2]]
; CHECK-NEXT: ret double [[R]]
;
%X1 = fsub double %A1, %B1
%X2 = fsub double %A2, %B2
%Y1 = fmul double %X1, %A1
%Y2 = fmul double %X2, %A2
%Z2 = fadd double %Y2, %B2
%Z1 = fadd double %Y1, %B1
%R = fmul double %Z1, %Z2
ret double %R
}
; Basic depth-3 chain (subclass data)
define i64 @test8(i64 %A1, i64 %A2, i64 %B1, i64 %B2) {
; CHECK-LABEL: @test8(
; CHECK-NEXT: [[X1_V_I1_1:%.*]] = insertelement <2 x i64> undef, i64 [[B1:%.*]], i32 0
; CHECK-NEXT: [[X1_V_I1_2:%.*]] = insertelement <2 x i64> [[X1_V_I1_1]], i64 [[B2:%.*]], i32 1
; CHECK-NEXT: [[X1_V_I0_1:%.*]] = insertelement <2 x i64> undef, i64 [[A1:%.*]], i32 0
; CHECK-NEXT: [[X1_V_I0_2:%.*]] = insertelement <2 x i64> [[X1_V_I0_1]], i64 [[A2:%.*]], i32 1
; CHECK-NEXT: [[X1:%.*]] = sub <2 x i64> [[X1_V_I0_2]], [[X1_V_I1_2]]
; CHECK-NEXT: [[Y1:%.*]] = mul <2 x i64> [[X1]], [[X1_V_I0_2]]
; CHECK-NEXT: [[Z1:%.*]] = add <2 x i64> [[Y1]], [[X1_V_I1_2]]
; CHECK-NEXT: [[Z1_V_R1:%.*]] = extractelement <2 x i64> [[Z1]], i32 0
; CHECK-NEXT: [[Z1_V_R2:%.*]] = extractelement <2 x i64> [[Z1]], i32 1
; CHECK-NEXT: [[R:%.*]] = mul i64 [[Z1_V_R1]], [[Z1_V_R2]]
; CHECK-NEXT: ret i64 [[R]]
;
%X1 = sub nsw i64 %A1, %B1
%X2 = sub i64 %A2, %B2
%Y1 = mul i64 %X1, %A1
%Y2 = mul i64 %X2, %A2
%Z1 = add i64 %Y1, %B1
%Z2 = add i64 %Y2, %B2
%R = mul i64 %Z1, %Z2
ret i64 %R
}

View File

@ -1,38 +0,0 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-vector-bits=192 -bb-vectorize-ignore-target-info -instcombine -gvn -S | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
; Basic depth-3 chain
define double @test1(double %A1, double %A2, double %A3, double %B1, double %B2, double %B3) {
; CHECK-LABEL: @test1(
; CHECK-NEXT: [[X1_V_I1_11:%.*]] = insertelement <3 x double> undef, double [[B1:%.*]], i32 0
; CHECK-NEXT: [[X1_V_I1_22:%.*]] = insertelement <3 x double> [[X1_V_I1_11]], double [[B2:%.*]], i32 1
; CHECK-NEXT: [[X1_V_I1:%.*]] = insertelement <3 x double> [[X1_V_I1_22]], double [[B3:%.*]], i32 2
; CHECK-NEXT: [[X1_V_I0_13:%.*]] = insertelement <3 x double> undef, double [[A1:%.*]], i32 0
; CHECK-NEXT: [[X1_V_I0_24:%.*]] = insertelement <3 x double> [[X1_V_I0_13]], double [[A2:%.*]], i32 1
; CHECK-NEXT: [[X1_V_I0:%.*]] = insertelement <3 x double> [[X1_V_I0_24]], double [[A3:%.*]], i32 2
; CHECK-NEXT: [[X1:%.*]] = fsub <3 x double> [[X1_V_I0]], [[X1_V_I1]]
; CHECK-NEXT: [[Y1:%.*]] = fmul <3 x double> [[X1]], [[X1_V_I0]]
; CHECK-NEXT: [[Z1:%.*]] = fadd <3 x double> [[Y1]], [[X1_V_I1]]
; CHECK-NEXT: [[Z1_V_R210:%.*]] = extractelement <3 x double> [[Z1]], i32 2
; CHECK-NEXT: [[Z1_V_R1:%.*]] = extractelement <3 x double> [[Z1]], i32 0
; CHECK-NEXT: [[Z1_V_R2:%.*]] = extractelement <3 x double> [[Z1]], i32 1
; CHECK-NEXT: [[R1:%.*]] = fmul double [[Z1_V_R1]], [[Z1_V_R2]]
; CHECK-NEXT: [[R:%.*]] = fmul double [[R1]], [[Z1_V_R210]]
; CHECK-NEXT: ret double [[R]]
;
%X1 = fsub double %A1, %B1
%X2 = fsub double %A2, %B2
%X3 = fsub double %A3, %B3
%Y1 = fmul double %X1, %A1
%Y2 = fmul double %X2, %A2
%Y3 = fmul double %X3, %A3
%Z1 = fadd double %Y1, %B1
%Z2 = fadd double %Y2, %B2
%Z3 = fadd double %Y3, %B3
%R1 = fmul double %Z1, %Z2
%R = fmul double %R1, %Z3
ret double %R
}

View File

@ -1,43 +0,0 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -bb-vectorize -S | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
@d = external global [1 x [10 x [1 x i16]]], align 16
define void @test() {
; CHECK-LABEL: @test(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[BOOL:%.*]] = icmp ne i32 undef, 0
; CHECK-NEXT: [[BOOLVEC:%.*]] = icmp ne <4 x i32> undef, zeroinitializer
; CHECK-NEXT: br label [[BODY:%.*]]
; CHECK: body:
; CHECK-NEXT: [[TMP0:%.*]] = select i1 [[BOOL]], <4 x i16> <i16 -2, i16 -2, i16 -2, i16 -2>, <4 x i16> <i16 -3, i16 -3, i16 -3, i16 -3>
; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[BOOL]], <4 x i16> <i16 -2, i16 -2, i16 -2, i16 -2>, <4 x i16> <i16 -3, i16 -3, i16 -3, i16 -3>
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> [[TMP1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i1> [[BOOLVEC]], <4 x i1> [[BOOLVEC]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; CHECK-NEXT: [[TMP4:%.*]] = select <8 x i1> [[TMP3]], <8 x i16> <i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3>, <8 x i16> [[TMP2]]
; CHECK-NEXT: store <8 x i16> [[TMP4]], <8 x i16>* bitcast (i16* getelementptr ([1 x [10 x [1 x i16]]], [1 x [10 x [1 x i16]]]* @d, i64 0, i64 0, i64 undef, i64 0) to <8 x i16>*), align 2
; CHECK-NEXT: ret void
;
entry:
%bool = icmp ne i32 undef, 0
%boolvec = icmp ne <4 x i32> undef, zeroinitializer
br label %body
body:
%0 = select i1 %bool, <4 x i16> <i16 -2, i16 -2, i16 -2, i16 -2>, <4 x i16> <i16 -3, i16 -3, i16 -3, i16 -3>
%1 = select i1 %bool, <4 x i16> <i16 -2, i16 -2, i16 -2, i16 -2>, <4 x i16> <i16 -3, i16 -3, i16 -3, i16 -3>
%2 = select <4 x i1> %boolvec, <4 x i16> <i16 -3, i16 -3, i16 -3, i16 -3>, <4 x i16> %0
%3 = select <4 x i1> %boolvec, <4 x i16> <i16 -3, i16 -3, i16 -3, i16 -3>, <4 x i16> %1
%4 = add nsw <4 x i16> %2, zeroinitializer
%5 = add nsw <4 x i16> %3, zeroinitializer
%6 = getelementptr inbounds [1 x [10 x [1 x i16]]], [1 x [10 x [1 x i16]]]* @d, i64 0, i64 0, i64 undef, i64 0
%7 = bitcast i16* %6 to <4 x i16>*
store <4 x i16> %4, <4 x i16>* %7, align 2
%8 = getelementptr [1 x [10 x [1 x i16]]], [1 x [10 x [1 x i16]]]* @d, i64 0, i64 0, i64 undef, i64 4
%9 = bitcast i16* %8 to <4 x i16>*
store <4 x i16> %5, <4 x i16>* %9, align 2
ret void
}

View File

@ -1,18 +0,0 @@
; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S -mtriple=xcore | FileCheck %s
target datalayout = "e-p:32:32:32-a0:0:32-n32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f16:16:32-f32:32:32-f64:32:32"
target triple = "xcore"
; Basic depth-3 chain
define double @test1(double %A1, double %A2, double %B1, double %B2) {
; CHECK-LABEL: @test1(
; CHECK-NOT: <2 x double>
%X1 = fsub double %A1, %B1
%X2 = fsub double %A2, %B2
%Y1 = fmul double %X1, %A1
%Y2 = fmul double %X2, %A2
%Z1 = fadd double %Y1, %B1
%Z2 = fadd double %Y2, %B2
%R = fmul double %Z1, %Z2
ret double %R
}