Propagate flags to SDValue in SplitVecOp_VECREDUCE

This patch is a fix for PR36642. While legalizing long vector types, make sure the smaller types get the flags of the wider type. bugzilla link: https://bugs.llvm.org/show_bug.cgi?id=36642 Change-Id: I0c2829639f094c862c10a6b51b342d4c2563e1fa llvm-svn: 327079
2018-03-08 23:41:40 +00:00 · 2018-03-08 23:41:40 +00:00 · 986865c090
parent 69fce12c38
commit 986865c090
2 changed files with 33 additions and 2 deletions
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@ -1694,8 +1694,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_VECREDUCE(SDNode *N, unsigned OpNo) {

  // Use the appropriate scalar instruction on the split subvectors before
  // reducing the now partially reduced smaller vector.
-  SDValue Partial = DAG.getNode(CombineOpc, dl, LoOpVT, Lo, Hi);
-  return DAG.getNode(N->getOpcode(), dl, ResVT, Partial);
+  SDValue Partial = DAG.getNode(CombineOpc, dl, LoOpVT, Lo, Hi, N->getFlags());
+  return DAG.getNode(N->getOpcode(), dl, ResVT, Partial, N->getFlags());
 }

 SDValue DAGTypeLegalizer::SplitVecOp_UnaryOp(SDNode *N) {
--- a/llvm/test/CodeGen/AArch64/vecreduce-propagate-sd-flags.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-propagate-sd-flags.ll
@ -0,0 +1,31 @@
+; REQUIRES: arm-registered-target
+; REQUIRES: asserts
+; RUN: llc %s -debug-only=legalize-types 2>&1 | FileCheck %s
+
+; This test check that when v4f64 gets broken down to two v2f64 it maintains
+; the "nnan" flags.
+
+; CHECK: Legalizing node: [[VFOUR:t.*]]: v4f64 = BUILD_VECTOR
+; CHECK-NEXT: Analyzing result type: v4f64
+; CHECK-NEXT: Split node result: [[VFOUR]]: v4f64 = BUILD_VECTOR
+
+; CHECK: Legalizing node: [[VTWO:t.*]]: v2f64 = BUILD_VECTOR
+; CHECK: Legally typed node: [[VTWO]]: v2f64 = BUILD_VECTOR
+; CHECK: Legalizing node: t26: v2f64 = fmaxnum nnan [[VTWO]], [[VTWO]]
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64--linux-gnu"
+
+
+; Function Attrs: norecurse nounwind
+define fastcc double @test() unnamed_addr #1 {
+entry:
+ %0 = insertelement <4 x double> undef, double 1.0, i32 0
+ %1 = insertelement <4 x double> %0, double 1.0, i32 1
+ %2 = insertelement <4 x double> %1, double 1.0, i32 2
+ %3 = insertelement <4 x double> %2, double 1.0, i32 3
+ %4 = call nnan double @llvm.experimental.vector.reduce.fmax.f64.v4f64(<4 x double> %3)
+ ret double %4
+}
+
+declare double @llvm.experimental.vector.reduce.fmax.f64.v4f64(<4 x double>)