diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index ec25d7c5ecce..44e92b4ec08d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -2393,8 +2393,9 @@ static bool isVectorReductionOp(const User *I) { // ElemNumToReduce / 2 elements in another vector. unsigned ResultElements = ShufInst->getType()->getVectorNumElements(); - ElemNumToReduce = ResultElements <= ElemNumToReduce ? ResultElements - : ElemNumToReduce; + if (ResultElements < ElemNum) + return false; + if (ElemNumToReduce == 1) return false; if (!isa(U->getOperand(1))) @@ -2407,8 +2408,7 @@ static bool isVectorReductionOp(const User *I) { return false; // There is only one user of this ShuffleVector instruction, which - // must - // be a reduction operation. + // must be a reduction operation. if (!U->hasOneUse()) return false; diff --git a/llvm/test/CodeGen/AArch64/arm64-detect-vec-redux.ll b/llvm/test/CodeGen/AArch64/arm64-detect-vec-redux.ll new file mode 100644 index 000000000000..68130f1c9f88 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/arm64-detect-vec-redux.ll @@ -0,0 +1,52 @@ +; RUN: llc -mtriple=arm64-darwin-unknown < %s + +target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" +target triple = "aarch64" + +; Function Attrs: nounwind readnone +define i32 @dotests_56() #0 { +entry: + %vqshrn_n4 = tail call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> zeroinitializer, i32 19) + %shuffle.i109 = shufflevector <2 x i32> %vqshrn_n4, <2 x i32> undef, <4 x i32> + %neg = xor <4 x i32> %shuffle.i109, + %shuffle = shufflevector <4 x i32> %neg, <4 x i32> undef, <2 x i32> + %mul = mul <2 x i32> %shuffle, bitcast (<1 x i64> to <2 x i32>), i32 0), i32 sub (i32 0, i32 extractelement (<2 x i32> bitcast (<1 x i64> to <2 x i32>), i32 0))), i32 undef> + %shuffle27 = shufflevector <2 x i32> %mul, <2 x i32> undef, <4 x i32> zeroinitializer + %0 = bitcast <4 x i32> %shuffle27 to <8 x i16> + %shuffle.i108 = shufflevector <8 x i16> %0, <8 x i16> undef, <8 x i32> + %vqshrn_n38 = tail call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> %shuffle.i108, i32 1) + %shuffle.i = shufflevector <8 x i8> %vqshrn_n38, <8 x i8> undef, <16 x i32> + %1 = bitcast <16 x i8> %shuffle.i to <2 x i64> + %vpaddq_v2.i = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> undef, <2 x i64> %1) #2 + %vqdmlal2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> undef, <2 x i32> undef) #2 + %vqdmlal_v3.i = tail call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %vpaddq_v2.i, <2 x i64> %vqdmlal2.i) #2 + %vmovn.i = trunc <2 x i64> %vqdmlal_v3.i to <2 x i32> + %vqdmulh_v2.i = tail call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> %vmovn.i, <2 x i32> zeroinitializer) #2 + %2 = bitcast <2 x i32> %vqdmulh_v2.i to <1 x i64> + %vget_lane = extractelement <1 x i64> %2, i32 0 + %cmp = icmp ne i64 %vget_lane, -7395147708962464393 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +; Function Attrs: nounwind readnone +declare <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64>, i32) #1 + +; Function Attrs: nounwind readnone +declare <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16>, i32) #1 + +; Function Attrs: nounwind readnone +declare <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32>, <2 x i32>) #1 + +; Function Attrs: nounwind readnone +declare <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64>, <2 x i64>) #1 + +; Function Attrs: nounwind readnone +declare <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64>, <2 x i64>) #1 + +; Function Attrs: nounwind readnone +declare <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32>, <2 x i32>) #1 + +attributes #0 = { nounwind readnone "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+neon" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind readnone } +attributes #2 = { nounwind }