Vectorize a reduction chain feeding into a 'return' statement.

e.x return (a[0]+b[0]) + (a[1]+b[1]) Differential Revision: http://reviews.llvm.org/D6227 llvm-svn: 222364
2014-11-19 16:07:38 +00:00 · 2014-11-19 16:07:38 +00:00 · aba97f4aba
parent efa443429d
commit aba97f4aba
2 changed files with 69 additions and 0 deletions
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@ -3696,6 +3696,21 @@ bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
          }
        }

+    // Try to vectorize horizontal reductions feeding into a return.
+    if (ReturnInst *RI = dyn_cast<ReturnInst>(it))
+      if (RI->getNumOperands() != 0)
+        if (BinaryOperator *BinOp =
+                dyn_cast<BinaryOperator>(RI->getOperand(0))) {
+          DEBUG(dbgs() << "SLP: Found a return to vectorize.\n");
+          if (tryToVectorizePair(BinOp->getOperand(0),
+                                 BinOp->getOperand(1), R)) {
+            Changed = true;
+            it = BB->begin();
+            e = BB->end();
+            continue;
+          }
+        }
+
    // Try to vectorize trees that start at compare instructions.
    if (CmpInst *CI = dyn_cast<CmpInst>(it)) {
      if (tryToVectorizePair(CI->getOperand(0), CI->getOperand(1), R)) {
--- a/llvm/test/Transforms/SLPVectorizer/X86/return.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/return.ll
@ -0,0 +1,54 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -S | FileCheck %s
+target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128"
+target triple = "x86_64--linux-gnu"
+
+@a = common global [4 x double] zeroinitializer, align 8
+@b = common global [4 x double] zeroinitializer, align 8
+
+; [4], b[4];
+; double foo() {
+;  double sum =0;
+;  sum = (a[0]+b[0]) + (a[1]+b[1]);
+;  return sum;
+; }
+
+; CHECK-LABEL: @return1
+; CHECK: %0 = load <2 x double>*
+; CHECK: %1 = load <2 x double>*
+; CHECK: %2 = fadd <2 x double>
+
+define double @return1() {
+entry:
+  %a0 = load double* getelementptr inbounds ([4 x double]* @a, i32 0, i32 0), align 8
+  %b0 = load double* getelementptr inbounds ([4 x double]* @b, i32 0, i32 0), align 8
+  %add0 = fadd double %a0, %b0
+  %a1 = load double* getelementptr inbounds ([4 x double]* @a, i32 0, i32 1), align 8
+  %b1 = load double* getelementptr inbounds ([4 x double]* @b, i32 0, i32 1), align 8
+  %add1 = fadd double %a1, %b1
+  %add2 = fadd double %add0, %add1
+  ret double %add2
+}
+
+; double hadd(double *x) {
+;   return ((x[0] + x[2]) + (x[1] + x[3]));
+; }
+
+; CHECK-LABEL: @return2
+; CHECK: %1 = load <2 x double>*
+; CHECK: %3 = load <2 x double>* %2
+; CHECK: %4 = fadd <2 x double> %1, %3
+
+define double @return2(double* nocapture readonly %x) {
+entry:
+  %x0 = load double* %x, align 4
+  %arrayidx1 = getelementptr inbounds double* %x, i32 2
+  %x2 = load double* %arrayidx1, align 4
+  %add3 = fadd double %x0, %x2
+  %arrayidx2 = getelementptr inbounds double* %x, i32 1
+  %x1 = load double* %arrayidx2, align 4
+  %arrayidx3 = getelementptr inbounds double* %x, i32 3
+  %x3 = load double* %arrayidx3, align 4
+  %add4 = fadd double %x1, %x3
+  %add5 = fadd double %add3, %add4
+  ret double %add5
+}