diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index bfd77b497008..02477985e291 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -1084,46 +1084,46 @@ public: return 0; } + /// Try to calculate arithmetic and shuffle op costs for reduction operations. + /// We're assuming that reduction operation are performing the following way: + /// 1. Non-pairwise reduction + /// %val1 = shufflevector %val, %undef, + /// + /// \----------------v-------------/ \----------v------------/ + /// n/2 elements n/2 elements + /// %red1 = op %val, val1 + /// After this operation we have a vector %red1 with only maningfull the + /// first n/2 elements, the second n/2 elements are undefined and can be + /// dropped. All other operations are actually working with the vector of + /// length n/2, not n. though the real vector length is still n. + /// %val2 = shufflevector %red1, %undef, + /// + /// \----------------v-------------/ \----------v------------/ + /// n/4 elements 3*n/4 elements + /// %red2 = op %red1, val2 - working with the vector of + /// length n/2, the resulting vector has length n/4 etc. + /// 2. Pairwise reduction: + /// Everything is the same except for an additional shuffle operation which + /// is used to produce operands for pairwise kind of reductions. + /// %val1 = shufflevector %val, %undef, + /// + /// \-------------v----------/ \----------v------------/ + /// n/2 elements n/2 elements + /// %val2 = shufflevector %val, %undef, + /// + /// \-------------v----------/ \----------v------------/ + /// n/2 elements n/2 elements + /// %red1 = op %val1, val2 + /// Again, the operation is performed on vector, but the resulting + /// vector %red1 is vector. + /// + /// The cost model should take into account that the actual length of the + /// vector is reduced on each iteration. unsigned getReductionCost(unsigned Opcode, Type *Ty, bool IsPairwise) { assert(Ty->isVectorTy() && "Expect a vector type"); Type *ScalarTy = Ty->getVectorElementType(); unsigned NumVecElts = Ty->getVectorNumElements(); unsigned NumReduxLevels = Log2_32(NumVecElts); - // Try to calculate arithmetic and shuffle op costs for reduction operations. - // We're assuming that reduction operation are performing the following way: - // 1. Non-pairwise reduction - // %val1 = shufflevector %val, %undef, - // - // \----------------v-------------/ \----------v------------/ - // n/2 elements n/2 elements - // %red1 = op %val, val1 - // After this operation we have a vector %red1 with only maningfull the - // first n/2 elements, the second n/2 elements are undefined and can be - // dropped. All other operations are actually working with the vector of - // length n/2, not n. though the real vector length is still n. - // %val2 = shufflevector %red1, %undef, - // - // \----------------v-------------/ \----------v------------/ - // n/4 elements 3*n/4 elements - // %red2 = op %red1, val2 - working with the vector of - // length n/2, the resulting vector has length n/4 etc. - // 2. Pairwise reduction: - // Everything is the same except for an additional shuffle operation which - // is used to produce operands for pairwise kind of reductions. - // %val1 = shufflevector %val, %undef, - // - // \-------------v----------/ \----------v------------/ - // n/2 elements n/2 elements - // %val2 = shufflevector %val, %undef, - // - // \-------------v----------/ \----------v------------/ - // n/2 elements n/2 elements - // %red1 = op %val1, val2 - // Again, the operation is performed on vector, but the resulting - // vector %red1 is vector. - // - // The cost model should take into account that the actual length of the - // vector is reduced on each iteration. unsigned ArithCost = 0; unsigned ShuffleCost = 0; auto *ConcreteTTI = static_cast(this);