[RISCV] Refine costs for i1 reductions

Our actual lowering for i1 reductions uses ctpop combined with possibly a vector negate and possibly a logic op afterwards. I believe ctpop to be low cost on all reasonable hardware. The default costing implementation here was returning quite inconsistent costs. and/or were returning very high costs (because we seem to think moving into scalar registers is very expensive?) and others were returning lower but still too high (because of the assumed tree reduce strategy). While we should probably improve the generic costing strategy for i1 vectors, let's start by fixing the immediate problem. Differential Revision: https://reviews.llvm.org/D127511
2022-06-10 13:21:52 -07:00 · 2022-06-10 13:21:52 -07:00 · 536095a27c
parent f7bb691d61
commit 536095a27c
7 changed files with 105 additions and 109 deletions
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@ -323,10 +323,15 @@ RISCVTTIImpl::getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
  if (Ty->getScalarSizeInBits() > ST->getELEN())
    return BaseT::getMinMaxReductionCost(Ty, CondTy, IsUnsigned, CostKind);

+  std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
+  if (Ty->getElementType()->isIntegerTy(1))
+    // vcpop sequences, see vreduction-mask.ll.  umax, smin actually only
+    // cost 2, but we don't have enough info here so we slightly over cost.
+    return (LT.first - 1) + 3;
+
  // IR Reduction is composed by two vmv and one rvv reduction instruction.
  InstructionCost BaseCost = 2;
  unsigned VL = cast<FixedVectorType>(Ty)->getNumElements();
-  std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
  return (LT.first - 1) + BaseCost + Log2_32_Ceil(VL);
 }

@ -338,10 +343,6 @@ RISCVTTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *VTy,
  if (!isa<FixedVectorType>(VTy))
    return BaseT::getArithmeticReductionCost(Opcode, VTy, FMF, CostKind);

-  // FIXME: Do not support i1 and/or reduction now.
-  if (VTy->getElementType()->isIntegerTy(1))
-    return BaseT::getArithmeticReductionCost(Opcode, VTy, FMF, CostKind);
-
  if (!ST->useRVVForFixedLengthVectors())
    return BaseT::getArithmeticReductionCost(Opcode, VTy, FMF, CostKind);

@ -356,11 +357,14 @@ RISCVTTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *VTy,
      ISD != ISD::FADD)
    return BaseT::getArithmeticReductionCost(Opcode, VTy, FMF, CostKind);

+  std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, VTy);
+  if (VTy->getElementType()->isIntegerTy(1))
+    // vcpop sequences, see vreduction-mask.ll
+    return (LT.first - 1) + (ISD == ISD::AND ? 3 : 2);
+
  // IR Reduction is composed by two vmv and one rvv reduction instruction.
  InstructionCost BaseCost = 2;
  unsigned VL = cast<FixedVectorType>(VTy)->getNumElements();
-  std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, VTy);
-
  if (TTI::requiresOrderedReduction(FMF))
    return (LT.first - 1) + BaseCost + VL;
  return (LT.first - 1) + BaseCost + Log2_32_Ceil(VL);
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-add.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-add.ll
@ -4,14 +4,14 @@

 define i32 @reduce_i1(i32 %arg) {
 ; CHECK-LABEL: 'reduce_i1'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.vector.reduce.add.v1i1(<1 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V2 = call i1 @llvm.vector.reduce.add.v2i1(<2 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 41 for instruction: %V4 = call i1 @llvm.vector.reduce.add.v4i1(<4 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 121 for instruction: %V8 = call i1 @llvm.vector.reduce.add.v8i1(<8 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 321 for instruction: %V16 = call i1 @llvm.vector.reduce.add.v16i1(<16 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 801 for instruction: %V32 = call i1 @llvm.vector.reduce.add.v32i1(<32 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1921 for instruction: %V64 = call i1 @llvm.vector.reduce.add.v64i1(<64 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4481 for instruction: %V128 = call i1 @llvm.vector.reduce.add.v128i1(<128 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i1 @llvm.vector.reduce.add.v1i1(<1 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i1 @llvm.vector.reduce.add.v2i1(<2 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i1 @llvm.vector.reduce.add.v4i1(<4 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i1 @llvm.vector.reduce.add.v8i1(<8 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i1 @llvm.vector.reduce.add.v16i1(<16 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i1 @llvm.vector.reduce.add.v32i1(<32 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i1 @llvm.vector.reduce.add.v64i1(<64 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128 = call i1 @llvm.vector.reduce.add.v128i1(<128 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
  %V1   = call i1 @llvm.vector.reduce.add.v1i1(<1 x i1> undef)
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-and.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-and.ll
@ -1,35 +1,21 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=256 -passes='print<cost-model>' -cost-kind=throughput 2>&1 -disable-output | FileCheck %s  --check-prefixes=CHECK,RISCV32
-; RUN: opt < %s -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=256 -passes='print<cost-model>' -cost-kind=throughput 2>&1 -disable-output | FileCheck %s  --check-prefixes=CHECK,RISCV64
+; RUN: opt < %s -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=256 -passes='print<cost-model>' -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=256 -passes='print<cost-model>' -cost-kind=throughput 2>&1 -disable-output | FileCheck %s

 define i32 @reduce_i1(i32 %arg) {
-; RISCV32-LABEL: 'reduce_i1'
-; RISCV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.vector.reduce.and.v1i1(<1 x i1> undef)
-; RISCV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.vector.reduce.and.v2i1(<2 x i1> undef)
-; RISCV32-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> undef)
-; RISCV32-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V8 = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> undef)
-; RISCV32-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V16 = call i1 @llvm.vector.reduce.and.v16i1(<16 x i1> undef)
-; RISCV32-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %V32 = call i1 @llvm.vector.reduce.and.v32i1(<32 x i1> undef)
-; RISCV32-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %V64 = call i1 @llvm.vector.reduce.and.v64i1(<64 x i1> undef)
-; RISCV32-NEXT:  Cost Model: Found an estimated cost of 132 for instruction: %V128 = call i1 @llvm.vector.reduce.and.v128i1(<128 x i1> undef)
-; RISCV32-NEXT:  Cost Model: Found an estimated cost of 264 for instruction: %V256 = call i1 @llvm.vector.reduce.and.v256i1(<256 x i1> undef)
-; RISCV32-NEXT:  Cost Model: Found an estimated cost of 528 for instruction: %V512 = call i1 @llvm.vector.reduce.and.v512i1(<512 x i1> undef)
-; RISCV32-NEXT:  Cost Model: Found an estimated cost of 1056 for instruction: %V1024 = call i1 @llvm.vector.reduce.and.v1024i1(<1024 x i1> undef)
-; RISCV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; RISCV64-LABEL: 'reduce_i1'
-; RISCV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.vector.reduce.and.v1i1(<1 x i1> undef)
-; RISCV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.vector.reduce.and.v2i1(<2 x i1> undef)
-; RISCV64-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> undef)
-; RISCV64-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V8 = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> undef)
-; RISCV64-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V16 = call i1 @llvm.vector.reduce.and.v16i1(<16 x i1> undef)
-; RISCV64-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %V32 = call i1 @llvm.vector.reduce.and.v32i1(<32 x i1> undef)
-; RISCV64-NEXT:  Cost Model: Found an estimated cost of 65 for instruction: %V64 = call i1 @llvm.vector.reduce.and.v64i1(<64 x i1> undef)
-; RISCV64-NEXT:  Cost Model: Found an estimated cost of 130 for instruction: %V128 = call i1 @llvm.vector.reduce.and.v128i1(<128 x i1> undef)
-; RISCV64-NEXT:  Cost Model: Found an estimated cost of 260 for instruction: %V256 = call i1 @llvm.vector.reduce.and.v256i1(<256 x i1> undef)
-; RISCV64-NEXT:  Cost Model: Found an estimated cost of 520 for instruction: %V512 = call i1 @llvm.vector.reduce.and.v512i1(<512 x i1> undef)
-; RISCV64-NEXT:  Cost Model: Found an estimated cost of 1040 for instruction: %V1024 = call i1 @llvm.vector.reduce.and.v1024i1(<1024 x i1> undef)
-; RISCV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; CHECK-LABEL: 'reduce_i1'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i1 @llvm.vector.reduce.and.v1i1(<1 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.vector.reduce.and.v2i1(<2 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i1 @llvm.vector.reduce.and.v16i1(<16 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i1 @llvm.vector.reduce.and.v32i1(<32 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i1 @llvm.vector.reduce.and.v64i1(<64 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i1 @llvm.vector.reduce.and.v128i1(<128 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V256 = call i1 @llvm.vector.reduce.and.v256i1(<256 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V512 = call i1 @llvm.vector.reduce.and.v512i1(<512 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V1024 = call i1 @llvm.vector.reduce.and.v1024i1(<1024 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
  %V1   = call i1 @llvm.vector.reduce.and.v1i1(<1 x i1> undef)
  %V2   = call i1 @llvm.vector.reduce.and.v2i1(<2 x i1> undef)
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-max.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-max.ll
@ -4,15 +4,16 @@

 define i32 @reduce_umin_i1(i32 %arg) {
 ; CHECK-LABEL: 'reduce_umin_i1'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i1 @llvm.vector.reduce.umax.v1i1(<1 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i1 @llvm.vector.reduce.umax.v1i1(<1 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.vector.reduce.umax.v2i1(<2 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i1 @llvm.vector.reduce.umax.v4i1(<4 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i1 @llvm.vector.reduce.umax.v8i1(<8 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i1 @llvm.vector.reduce.umax.v16i1(<16 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i1 @llvm.vector.reduce.umax.v32i1(<32 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i1 @llvm.vector.reduce.umax.v64i1(<64 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i1 @llvm.vector.reduce.umax.v128i1(<128 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i1 @llvm.vector.reduce.umax.v4i1(<4 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i1 @llvm.vector.reduce.umax.v8i1(<8 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i1 @llvm.vector.reduce.umax.v16i1(<16 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i1 @llvm.vector.reduce.umax.v32i1(<32 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i1 @llvm.vector.reduce.umax.v64i1(<64 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i1 @llvm.vector.reduce.umax.v128i1(<128 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
  %V1   = call i1 @llvm.vector.reduce.umax.v1i1(<1 x i1> undef)
  %V2   = call i1 @llvm.vector.reduce.umax.v2i1(<2 x i1> undef)
  %V4   = call i1 @llvm.vector.reduce.umax.v4i1(<4 x i1> undef)
@ -35,6 +36,7 @@ define i32 @reduce_umax_i8(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i8 @llvm.vector.reduce.umax.v64i8(<64 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i8 @llvm.vector.reduce.umax.v128i8(<128 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
  %V1   = call i8 @llvm.vector.reduce.umax.v1i8(<1 x i8> undef)
  %V2   = call i8 @llvm.vector.reduce.umax.v2i8(<2 x i8> undef)
  %V4   = call i8 @llvm.vector.reduce.umax.v4i8(<4 x i8> undef)
@ -57,6 +59,7 @@ define i32 @reduce_umax_i16(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.vector.reduce.umax.v64i16(<64 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i16 @llvm.vector.reduce.umax.v128i16(<128 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
  %V1   = call i16 @llvm.vector.reduce.umax.v1i16(<1 x i16> undef)
  %V2   = call i16 @llvm.vector.reduce.umax.v2i16(<2 x i16> undef)
  %V4   = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> undef)
@ -79,6 +82,7 @@ define i32 @reduce_umax_i32(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i32 @llvm.vector.reduce.umax.v64i32(<64 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i32 @llvm.vector.reduce.umax.v128i32(<128 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
  %V1   = call i32 @llvm.vector.reduce.umax.v1i32(<1 x i32> undef)
  %V2   = call i32 @llvm.vector.reduce.umax.v2i32(<2 x i32> undef)
  %V4   = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> undef)
@ -101,6 +105,7 @@ define i32 @reduce_umax_i64(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i64 @llvm.vector.reduce.umax.v64i64(<64 x i64> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V128 = call i64 @llvm.vector.reduce.umax.v128i64(<128 x i64> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
  %V1   = call i64 @llvm.vector.reduce.umax.v1i64(<1 x i64> undef)
  %V2   = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> undef)
  %V4   = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> undef)
@ -114,15 +119,16 @@ define i32 @reduce_umax_i64(i32 %arg) {

 define i32 @reduce_smin_i1(i32 %arg) {
 ; CHECK-LABEL: 'reduce_smin_i1'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i1 @llvm.vector.reduce.smax.v1i1(<1 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i1 @llvm.vector.reduce.smax.v1i1(<1 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.vector.reduce.smax.v2i1(<2 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i1 @llvm.vector.reduce.smax.v4i1(<4 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i1 @llvm.vector.reduce.smax.v8i1(<8 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i1 @llvm.vector.reduce.smax.v16i1(<16 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i1 @llvm.vector.reduce.smax.v32i1(<32 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i1 @llvm.vector.reduce.smax.v64i1(<64 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i1 @llvm.vector.reduce.smax.v128i1(<128 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i1 @llvm.vector.reduce.smax.v4i1(<4 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i1 @llvm.vector.reduce.smax.v8i1(<8 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i1 @llvm.vector.reduce.smax.v16i1(<16 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i1 @llvm.vector.reduce.smax.v32i1(<32 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i1 @llvm.vector.reduce.smax.v64i1(<64 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i1 @llvm.vector.reduce.smax.v128i1(<128 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
  %V1   = call i1 @llvm.vector.reduce.smax.v1i1(<1 x i1> undef)
  %V2   = call i1 @llvm.vector.reduce.smax.v2i1(<2 x i1> undef)
  %V4   = call i1 @llvm.vector.reduce.smax.v4i1(<4 x i1> undef)
@ -145,6 +151,7 @@ define i32 @reduce_smax_i8(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i8 @llvm.vector.reduce.smax.v64i8(<64 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i8 @llvm.vector.reduce.smax.v128i8(<128 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
  %V1   = call i8 @llvm.vector.reduce.smax.v1i8(<1 x i8> undef)
  %V2   = call i8 @llvm.vector.reduce.smax.v2i8(<2 x i8> undef)
  %V4   = call i8 @llvm.vector.reduce.smax.v4i8(<4 x i8> undef)
@ -167,6 +174,7 @@ define i32 @reduce_smax_i16(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.vector.reduce.smax.v64i16(<64 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i16 @llvm.vector.reduce.smax.v128i16(<128 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
  %V1   = call i16 @llvm.vector.reduce.smax.v1i16(<1 x i16> undef)
  %V2   = call i16 @llvm.vector.reduce.smax.v2i16(<2 x i16> undef)
  %V4   = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> undef)
@ -189,6 +197,7 @@ define i32 @reduce_smax_i32(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i32 @llvm.vector.reduce.smax.v64i32(<64 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i32 @llvm.vector.reduce.smax.v128i32(<128 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
  %V1   = call i32 @llvm.vector.reduce.smax.v1i32(<1 x i32> undef)
  %V2   = call i32 @llvm.vector.reduce.smax.v2i32(<2 x i32> undef)
  %V4   = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> undef)
@ -211,6 +220,7 @@ define i32 @reduce_smax_i64(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i64 @llvm.vector.reduce.smax.v64i64(<64 x i64> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V128 = call i64 @llvm.vector.reduce.smax.v128i64(<128 x i64> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
  %V1   = call i64 @llvm.vector.reduce.smax.v1i64(<1 x i64> undef)
  %V2   = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef)
  %V4   = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef)
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-min.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-min.ll
@ -4,15 +4,16 @@

 define i32 @reduce_umin_i1(i32 %arg) {
 ; CHECK-LABEL: 'reduce_umin_i1'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i1 @llvm.vector.reduce.umin.v1i1(<1 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i1 @llvm.vector.reduce.umin.v1i1(<1 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.vector.reduce.umin.v2i1(<2 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i1 @llvm.vector.reduce.umin.v4i1(<4 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i1 @llvm.vector.reduce.umin.v8i1(<8 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i1 @llvm.vector.reduce.umin.v16i1(<16 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i1 @llvm.vector.reduce.umin.v32i1(<32 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i1 @llvm.vector.reduce.umin.v64i1(<64 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i1 @llvm.vector.reduce.umin.v128i1(<128 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i1 @llvm.vector.reduce.umin.v4i1(<4 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i1 @llvm.vector.reduce.umin.v8i1(<8 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i1 @llvm.vector.reduce.umin.v16i1(<16 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i1 @llvm.vector.reduce.umin.v32i1(<32 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i1 @llvm.vector.reduce.umin.v64i1(<64 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i1 @llvm.vector.reduce.umin.v128i1(<128 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
  %V1   = call i1 @llvm.vector.reduce.umin.v1i1(<1 x i1> undef)
  %V2   = call i1 @llvm.vector.reduce.umin.v2i1(<2 x i1> undef)
  %V4   = call i1 @llvm.vector.reduce.umin.v4i1(<4 x i1> undef)
@ -35,6 +36,7 @@ define i32 @reduce_umin_i8(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i8 @llvm.vector.reduce.umin.v64i8(<64 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i8 @llvm.vector.reduce.umin.v128i8(<128 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
  %V1   = call i8 @llvm.vector.reduce.umin.v1i8(<1 x i8> undef)
  %V2   = call i8 @llvm.vector.reduce.umin.v2i8(<2 x i8> undef)
  %V4   = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> undef)
@ -57,6 +59,7 @@ define i32 @reduce_umin_i16(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.vector.reduce.umin.v64i16(<64 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i16 @llvm.vector.reduce.umin.v128i16(<128 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
  %V1   = call i16 @llvm.vector.reduce.umin.v1i16(<1 x i16> undef)
  %V2   = call i16 @llvm.vector.reduce.umin.v2i16(<2 x i16> undef)
  %V4   = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> undef)
@ -79,6 +82,7 @@ define i32 @reduce_umin_i32(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i32 @llvm.vector.reduce.umin.v64i32(<64 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i32 @llvm.vector.reduce.umin.v128i32(<128 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
  %V1   = call i32 @llvm.vector.reduce.umin.v1i32(<1 x i32> undef)
  %V2   = call i32 @llvm.vector.reduce.umin.v2i32(<2 x i32> undef)
  %V4   = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> undef)
@ -101,6 +105,7 @@ define i32 @reduce_umin_i64(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i64 @llvm.vector.reduce.umin.v64i64(<64 x i64> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V128 = call i64 @llvm.vector.reduce.umin.v128i64(<128 x i64> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
  %V1   = call i64 @llvm.vector.reduce.umin.v1i64(<1 x i64> undef)
  %V2   = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef)
  %V4   = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> undef)
@ -114,15 +119,16 @@ define i32 @reduce_umin_i64(i32 %arg) {

 define i32 @reduce_smin_i1(i32 %arg) {
 ; CHECK-LABEL: 'reduce_smin_i1'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i1 @llvm.vector.reduce.smin.v1i1(<1 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i1 @llvm.vector.reduce.smin.v1i1(<1 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.vector.reduce.smin.v2i1(<2 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i1 @llvm.vector.reduce.smin.v4i1(<4 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i1 @llvm.vector.reduce.smin.v8i1(<8 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i1 @llvm.vector.reduce.smin.v16i1(<16 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i1 @llvm.vector.reduce.smin.v32i1(<32 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i1 @llvm.vector.reduce.smin.v64i1(<64 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i1 @llvm.vector.reduce.smin.v128i1(<128 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i1 @llvm.vector.reduce.smin.v4i1(<4 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i1 @llvm.vector.reduce.smin.v8i1(<8 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i1 @llvm.vector.reduce.smin.v16i1(<16 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i1 @llvm.vector.reduce.smin.v32i1(<32 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i1 @llvm.vector.reduce.smin.v64i1(<64 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i1 @llvm.vector.reduce.smin.v128i1(<128 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
  %V1   = call i1 @llvm.vector.reduce.smin.v1i1(<1 x i1> undef)
  %V2   = call i1 @llvm.vector.reduce.smin.v2i1(<2 x i1> undef)
  %V4   = call i1 @llvm.vector.reduce.smin.v4i1(<4 x i1> undef)
@ -145,6 +151,7 @@ define i32 @reduce_smin_i8(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i8 @llvm.vector.reduce.smin.v64i8(<64 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i8 @llvm.vector.reduce.smin.v128i8(<128 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
  %V1   = call i8 @llvm.vector.reduce.smin.v1i8(<1 x i8> undef)
  %V2   = call i8 @llvm.vector.reduce.smin.v2i8(<2 x i8> undef)
  %V4   = call i8 @llvm.vector.reduce.smin.v4i8(<4 x i8> undef)
@ -167,6 +174,7 @@ define i32 @reduce_smin_i16(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.vector.reduce.smin.v64i16(<64 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i16 @llvm.vector.reduce.smin.v128i16(<128 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
  %V1   = call i16 @llvm.vector.reduce.smin.v1i16(<1 x i16> undef)
  %V2   = call i16 @llvm.vector.reduce.smin.v2i16(<2 x i16> undef)
  %V4   = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> undef)
@ -189,6 +197,7 @@ define i32 @reduce_smin_i32(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i32 @llvm.vector.reduce.smin.v64i32(<64 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i32 @llvm.vector.reduce.smin.v128i32(<128 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
  %V1   = call i32 @llvm.vector.reduce.smin.v1i32(<1 x i32> undef)
  %V2   = call i32 @llvm.vector.reduce.smin.v2i32(<2 x i32> undef)
  %V4   = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> undef)
@ -211,6 +220,7 @@ define i32 @reduce_smin_i64(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i64 @llvm.vector.reduce.smin.v64i64(<64 x i64> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V128 = call i64 @llvm.vector.reduce.smin.v128i64(<128 x i64> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
  %V1   = call i64 @llvm.vector.reduce.smin.v1i64(<1 x i64> undef)
  %V2   = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> undef)
  %V4   = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> undef)
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-or.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-or.ll
@ -1,35 +1,21 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=256 -passes='print<cost-model>' -cost-kind=throughput 2>&1 -disable-output | FileCheck %s --check-prefixes=CHECK,RISCV32
-; RUN: opt < %s -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=256 -passes='print<cost-model>' -cost-kind=throughput 2>&1 -disable-output | FileCheck %s  --check-prefixes=CHECK,RISCV64
+; RUN: opt < %s -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=256 -passes='print<cost-model>' -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=256 -passes='print<cost-model>' -cost-kind=throughput 2>&1 -disable-output | FileCheck %s

 define i32 @reduce_i1(i32 %arg) {
-; RISCV32-LABEL: 'reduce_i1'
-; RISCV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.vector.reduce.or.v1i1(<1 x i1> undef)
-; RISCV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> undef)
-; RISCV32-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> undef)
-; RISCV32-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V8 = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> undef)
-; RISCV32-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V16 = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> undef)
-; RISCV32-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %V32 = call i1 @llvm.vector.reduce.or.v32i1(<32 x i1> undef)
-; RISCV32-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %V64 = call i1 @llvm.vector.reduce.or.v64i1(<64 x i1> undef)
-; RISCV32-NEXT:  Cost Model: Found an estimated cost of 132 for instruction: %V128 = call i1 @llvm.vector.reduce.or.v128i1(<128 x i1> undef)
-; RISCV32-NEXT:  Cost Model: Found an estimated cost of 264 for instruction: %V256 = call i1 @llvm.vector.reduce.or.v256i1(<256 x i1> undef)
-; RISCV32-NEXT:  Cost Model: Found an estimated cost of 528 for instruction: %V512 = call i1 @llvm.vector.reduce.or.v512i1(<512 x i1> undef)
-; RISCV32-NEXT:  Cost Model: Found an estimated cost of 1056 for instruction: %V1024 = call i1 @llvm.vector.reduce.or.v1024i1(<1024 x i1> undef)
-; RISCV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; RISCV64-LABEL: 'reduce_i1'
-; RISCV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.vector.reduce.or.v1i1(<1 x i1> undef)
-; RISCV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> undef)
-; RISCV64-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> undef)
-; RISCV64-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V8 = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> undef)
-; RISCV64-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V16 = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> undef)
-; RISCV64-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %V32 = call i1 @llvm.vector.reduce.or.v32i1(<32 x i1> undef)
-; RISCV64-NEXT:  Cost Model: Found an estimated cost of 65 for instruction: %V64 = call i1 @llvm.vector.reduce.or.v64i1(<64 x i1> undef)
-; RISCV64-NEXT:  Cost Model: Found an estimated cost of 130 for instruction: %V128 = call i1 @llvm.vector.reduce.or.v128i1(<128 x i1> undef)
-; RISCV64-NEXT:  Cost Model: Found an estimated cost of 260 for instruction: %V256 = call i1 @llvm.vector.reduce.or.v256i1(<256 x i1> undef)
-; RISCV64-NEXT:  Cost Model: Found an estimated cost of 520 for instruction: %V512 = call i1 @llvm.vector.reduce.or.v512i1(<512 x i1> undef)
-; RISCV64-NEXT:  Cost Model: Found an estimated cost of 1040 for instruction: %V1024 = call i1 @llvm.vector.reduce.or.v1024i1(<1024 x i1> undef)
-; RISCV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; CHECK-LABEL: 'reduce_i1'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i1 @llvm.vector.reduce.or.v1i1(<1 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i1 @llvm.vector.reduce.or.v32i1(<32 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i1 @llvm.vector.reduce.or.v64i1(<64 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128 = call i1 @llvm.vector.reduce.or.v128i1(<128 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V256 = call i1 @llvm.vector.reduce.or.v256i1(<256 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V512 = call i1 @llvm.vector.reduce.or.v512i1(<512 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V1024 = call i1 @llvm.vector.reduce.or.v1024i1(<1024 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
  %V1   = call i1 @llvm.vector.reduce.or.v1i1(<1 x i1> undef)
  %V2   = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> undef)
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-xor.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-xor.ll
@ -4,14 +4,14 @@

 define i32 @reduce_i1(i32 %arg) {
 ; CHECK-LABEL: 'reduce_i1'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.vector.reduce.xor.v1i1(<1 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i1 @llvm.vector.reduce.xor.v2i1(<2 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V4 = call i1 @llvm.vector.reduce.xor.v4i1(<4 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 55 for instruction: %V8 = call i1 @llvm.vector.reduce.xor.v8i1(<8 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 137 for instruction: %V16 = call i1 @llvm.vector.reduce.xor.v16i1(<16 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 331 for instruction: %V32 = call i1 @llvm.vector.reduce.xor.v32i1(<32 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 781 for instruction: %V64 = call i1 @llvm.vector.reduce.xor.v64i1(<64 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1807 for instruction: %V128 = call i1 @llvm.vector.reduce.xor.v128i1(<128 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i1 @llvm.vector.reduce.xor.v1i1(<1 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i1 @llvm.vector.reduce.xor.v2i1(<2 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i1 @llvm.vector.reduce.xor.v4i1(<4 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i1 @llvm.vector.reduce.xor.v8i1(<8 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i1 @llvm.vector.reduce.xor.v16i1(<16 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i1 @llvm.vector.reduce.xor.v32i1(<32 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i1 @llvm.vector.reduce.xor.v64i1(<64 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128 = call i1 @llvm.vector.reduce.xor.v128i1(<128 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
  %V1   = call i1 @llvm.vector.reduce.xor.v1i1(<1 x i1> undef)