commit
622803b69c
|
@ -2019,7 +2019,8 @@ size_t Somas::CalcLowerBound() const {
|
|||
lifetime_lb[time] = 0;
|
||||
}
|
||||
|
||||
size_t lower, upper;
|
||||
size_t lower;
|
||||
size_t upper;
|
||||
for (const auto &tensor : tensors_list_) {
|
||||
MS_EXCEPTION_IF_NULL(tensor);
|
||||
if (tensor->lifelong_value_ == kLifeLongGraphAll) {
|
||||
|
|
|
@ -41,6 +41,8 @@ using std::vector;
|
|||
|
||||
namespace mindspore {
|
||||
namespace somas {
|
||||
constexpr auto kDefaultAlignmentSize = 512;
|
||||
|
||||
class Interval {
|
||||
public:
|
||||
Interval() : m_a_(0), m_b_(0) {}
|
||||
|
@ -180,7 +182,7 @@ class FootPrint : public std::enable_shared_from_this<FootPrint> {
|
|||
|
||||
class FastHeuristic {
|
||||
public:
|
||||
FastHeuristic() : m_alignment_(512), m_tensors_allocated_(0) {}
|
||||
FastHeuristic() : m_alignment_(kDefaultAlignmentSize), m_tensors_allocated_(0) {}
|
||||
~FastHeuristic() = default;
|
||||
|
||||
void setAlignment(const size_t &a) { m_alignment_ = a; }
|
||||
|
|
|
@ -43,6 +43,7 @@ constexpr char const *sortingNames[6] = {"size(>), index(<)",
|
|||
constexpr char const *branchingNames[4] = {"bestfit", "smallest", "largest", "worstfit"};
|
||||
constexpr char const *algorithmTypeNames[2] = {"Shared Objects", "Single Object"};
|
||||
constexpr auto kParallelComputeSizeThreshold = 2000;
|
||||
constexpr auto kHalfByteSize = 4;
|
||||
enum Status { FAILED, SUCCESS };
|
||||
enum AlgorithmType { kManyObjects = 0, kSingleObject, kNumAlgorithmTypes };
|
||||
enum SortingType {
|
||||
|
@ -117,7 +118,7 @@ class DynamicBitSet {
|
|||
auto *char_value = reinterpret_cast<unsigned char *>(&value);
|
||||
for (size_t j = 0; j < bit_width_ / CHAR_BIT; j++) {
|
||||
ret += ones_num_in_hex[static_cast<int>(char_value[j] & 0xF)];
|
||||
char_value[j] >>= 4;
|
||||
char_value[j] >>= kHalfByteSize;
|
||||
ret += ones_num_in_hex[static_cast<int>(char_value[j] & 0xF)];
|
||||
}
|
||||
}
|
||||
|
|
|
@ -312,7 +312,9 @@ MSTensor *MSTensor::CharStringsToTensor(const std::vector<char> &name, const std
|
|||
}
|
||||
|
||||
std::vector<std::vector<char>> MSTensor::TensorToStringChars(const MSTensor &tensor) {
|
||||
if (tensor == nullptr || tensor.DataType() != DataType::kObjectTypeString || tensor.DataSize() < 4) {
|
||||
constexpr auto minimum_tensor_size = 4;
|
||||
if (tensor == nullptr || tensor.DataType() != DataType::kObjectTypeString ||
|
||||
tensor.DataSize() < minimum_tensor_size) {
|
||||
MS_LOG(ERROR) << "Invalid tensor.";
|
||||
return {};
|
||||
}
|
||||
|
|
|
@ -210,7 +210,8 @@ struct AsymmetricFunc {
|
|||
template <typename T>
|
||||
struct HalfPixelFunc {
|
||||
T operator()(const T &new_x, const int &old_length, const int &new_length) const {
|
||||
return new_length > 1 ? (new_x + 0.5) * old_length / new_length - 0.5 : 0;
|
||||
constexpr auto half_pixel = 0.5;
|
||||
return new_length > 1 ? (new_x + half_pixel) * old_length / new_length - half_pixel : 0;
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
@ -45,8 +45,9 @@
|
|||
namespace mindspore {
|
||||
namespace device {
|
||||
namespace ascend {
|
||||
const int FLOAT_LEN = sizeof(float);
|
||||
const int FLOAT16_LEN = 2;
|
||||
const auto kFloat16Bytes = 2;
|
||||
const auto kFloatBytes = sizeof(float);
|
||||
const auto kFloat64Bytes = 8;
|
||||
|
||||
bool IsUseTransDataTypeFormat(const std::pair<std::string, std::string> &type_format) {
|
||||
static const std::set<std::pair<std::string, std::string>> use_trans_data = {
|
||||
|
@ -107,8 +108,8 @@ void SyncMemory(void *dst, const void *src, uint64_t size, aclrtMemcpyKind kind)
|
|||
}
|
||||
|
||||
bool FloatToHalfAndSyncHostToDevice(void *dst, size_t dst_size, const void *src, size_t src_size) {
|
||||
auto elem_num = src_size / FLOAT_LEN;
|
||||
if (elem_num != (dst_size / FLOAT16_LEN)) {
|
||||
auto elem_num = src_size / kFloatBytes;
|
||||
if (elem_num != (dst_size / kFloat16Bytes)) {
|
||||
MS_EXCEPTION(ArgumentError) << "FloatToHalf failed. size not match src_size[" << src_size << "], dst_size["
|
||||
<< dst_size << "]";
|
||||
}
|
||||
|
@ -119,7 +120,7 @@ bool FloatToHalfAndSyncHostToDevice(void *dst, size_t dst_size, const void *src,
|
|||
}
|
||||
|
||||
bool Float64ToFloatAndSyncHostToDevice(void *dst, size_t dst_size, const void *src, size_t src_size) {
|
||||
if (src_size / 2 != dst_size) {
|
||||
if (src_size / kFloat64Bytes != dst_size / kFloatBytes) {
|
||||
MS_EXCEPTION(ArgumentError) << "src_size[" << src_size << "], dst_size[" << dst_size << "]";
|
||||
}
|
||||
size_t elem_num = dst_size / sizeof(float);
|
||||
|
@ -130,8 +131,8 @@ bool Float64ToFloatAndSyncHostToDevice(void *dst, size_t dst_size, const void *s
|
|||
}
|
||||
|
||||
bool SyncDeviceToHostAndHalfToFloat(void *dst, size_t dst_size, const void *src, size_t src_size) {
|
||||
auto elem_num = src_size / FLOAT16_LEN;
|
||||
if (elem_num != (dst_size / FLOAT_LEN)) {
|
||||
auto elem_num = src_size / kFloat16Bytes;
|
||||
if (elem_num != (dst_size / kFloatBytes)) {
|
||||
MS_EXCEPTION(ArgumentError) << "HalfToFloat failed. size not match src_size[" << src_size << "], dst_size["
|
||||
<< dst_size << "]";
|
||||
}
|
||||
|
@ -142,7 +143,7 @@ bool SyncDeviceToHostAndHalfToFloat(void *dst, size_t dst_size, const void *src,
|
|||
}
|
||||
|
||||
bool SyncDeviceToHostAndFloatToFloat64(void *dst, size_t dst_size, const void *src, size_t src_size) {
|
||||
if (src_size != dst_size / 2) {
|
||||
if (src_size / kFloatBytes != dst_size / kFloat64Bytes) {
|
||||
MS_EXCEPTION(ArgumentError) << "src_size[" << src_size << "], dst_size[" << dst_size << "]";
|
||||
}
|
||||
size_t elem_num = src_size / sizeof(float);
|
||||
|
|
|
@ -2411,7 +2411,8 @@ void AscendStreamAssign::CheckEventAssign(const NotNull<KernelGraphPtr> &graph_p
|
|||
<< ", max event id:" << max_event_id << ", event map is:" << event_map;
|
||||
}
|
||||
for (const auto &item : std::as_const(event_map)) {
|
||||
if (item.second.size() != 2) {
|
||||
constexpr auto pair_size = 2;
|
||||
if (item.second.size() != pair_size) {
|
||||
MS_LOG(EXCEPTION) << "Send/recv should be in pair and share one event id, invalid event id is:" << item.first
|
||||
<< ", event size is:" << item.second.size();
|
||||
}
|
||||
|
|
|
@ -859,7 +859,8 @@ std::tuple<KernelSelectStatus, std::string, ExceptionType> SelectKernelInfoWithM
|
|||
KernelType kernel_type) {
|
||||
std::vector<std::shared_ptr<kernel::KernelBuildInfo>> kernel_info_list;
|
||||
std::vector<std::shared_ptr<kernel::KernelBuildInfo>> aicpu_kernel_info_list;
|
||||
std::ostringstream aicore_in_out_info, aicpu_in_out_info;
|
||||
std::ostringstream aicore_in_out_info;
|
||||
std::ostringstream aicpu_in_out_info;
|
||||
std::tuple<KernelSelectStatus, std::string, ExceptionType> result =
|
||||
std::make_tuple(kStatusAllMatched, "", NoExceptionType);
|
||||
MS_EXCEPTION_IF_NULL(kernel_node);
|
||||
|
|
|
@ -93,24 +93,24 @@ class HcclKernelFactory {
|
|||
std::map<string, HcclKernelCreater> hccl_kernel_map_;
|
||||
};
|
||||
|
||||
class _HcclKernelRegister {
|
||||
class HcclKernelRegister {
|
||||
public:
|
||||
_HcclKernelRegister(const string &name, HcclKernelCreater &&fun) {
|
||||
HcclKernelRegister(const string &name, HcclKernelCreater &&fun) {
|
||||
HcclKernelFactory::Get().Register(name, std::move(fun));
|
||||
}
|
||||
~_HcclKernelRegister() = default;
|
||||
~HcclKernelRegister() = default;
|
||||
};
|
||||
|
||||
#define _MS_HCCL_REG_KERNEL_REG(KNAME, clazz) \
|
||||
#define MS_HCCL_REG_KERNEL_REG(KNAME, clazz) \
|
||||
static_assert(std::is_base_of<HcclKernel, clazz>::value, " must be base of HcclKernel"); \
|
||||
static const _HcclKernelRegister g_##KNAME##_##_kernel_reg(#KNAME, []() { \
|
||||
static const HcclKernelRegister g_##KNAME##_##_kernel_reg(#KNAME, []() { \
|
||||
std::shared_ptr<clazz> ptr = nullptr; \
|
||||
ptr = std::make_shared<clazz>(); \
|
||||
MS_EXCEPTION_IF_NULL(ptr); \
|
||||
return ptr; \
|
||||
});
|
||||
|
||||
#define MS_HCCL_REG_KERNEL(KNAME, clazz) _MS_HCCL_REG_KERNEL_REG(KNAME, clazz)
|
||||
#define MS_HCCL_REG_KERNEL(KNAME, clazz) MS_HCCL_REG_KERNEL_REG(KNAME, clazz)
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
#endif
|
||||
|
|
|
@ -58,17 +58,17 @@ class HostKernelFactory {
|
|||
std::map<string, HostKernelCreater> hostKernelMap_;
|
||||
};
|
||||
|
||||
class _HostKernelRegister {
|
||||
class HostKernelRegister {
|
||||
public:
|
||||
_HostKernelRegister(const string &name, HostKernelCreater &&fun) {
|
||||
HostKernelRegister(const string &name, HostKernelCreater &&fun) {
|
||||
HostKernelFactory::Get().Register(name, std::move(fun));
|
||||
}
|
||||
~_HostKernelRegister() = default;
|
||||
~HostKernelRegister() = default;
|
||||
};
|
||||
|
||||
#define MS_HOST_REG_KERNEL_REG(KNAME, clazz) \
|
||||
static_assert(std::is_base_of<HostKernelMod, clazz>::value, " must be base of HostKernelMod"); \
|
||||
static const _HostKernelRegister g_##KNAME##_##_kernel_reg(#KNAME, []() { \
|
||||
static const HostKernelRegister g_##KNAME##_##_kernel_reg(#KNAME, []() { \
|
||||
std::shared_ptr<clazz> ptr = nullptr; \
|
||||
ptr = std::make_shared<clazz>(); \
|
||||
MS_EXCEPTION_IF_NULL(ptr); \
|
||||
|
|
|
@ -24,13 +24,16 @@ using MemcpyAsyncTaskInfoPtr = std::shared_ptr<MemcpyAsyncTaskInfo>;
|
|||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
namespace {
|
||||
constexpr auto kAssignInputSize = 2;
|
||||
}
|
||||
AssignKernel::AssignKernel() {}
|
||||
|
||||
AssignKernel::~AssignKernel() {}
|
||||
|
||||
bool AssignKernel::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
|
||||
const std::vector<AddressPtr> &, void *stream_ptr) {
|
||||
if (inputs.size() != 2) {
|
||||
if (inputs.size() != kAssignInputSize) {
|
||||
MS_LOG(ERROR) << "inputs size is not two";
|
||||
return false;
|
||||
}
|
||||
|
@ -52,7 +55,7 @@ bool AssignKernel::Launch(const std::vector<AddressPtr> &inputs, const std::vect
|
|||
|
||||
std::vector<TaskInfoPtr> AssignKernel::GenTask(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
|
||||
const std::vector<AddressPtr> &, uint32_t stream_id) {
|
||||
if (inputs.size() != 2) {
|
||||
if (inputs.size() != kAssignInputSize) {
|
||||
MS_LOG(EXCEPTION) << "Inputs size should be 2, but got " << inputs.size();
|
||||
}
|
||||
stream_id_ = stream_id;
|
||||
|
|
|
@ -61,19 +61,19 @@ class RtKernelFactory {
|
|||
std::map<string, RtKernelCreater> fmap_;
|
||||
};
|
||||
|
||||
class _RtKernelRegister {
|
||||
class RtKernelRegister {
|
||||
public:
|
||||
_RtKernelRegister(const std::string &name, RtKernelCreater &&fun) {
|
||||
RtKernelRegister(const std::string &name, RtKernelCreater &&fun) {
|
||||
RtKernelFactory::Get().Register(name, std::move(fun));
|
||||
}
|
||||
~_RtKernelRegister() = default;
|
||||
~RtKernelRegister() = default;
|
||||
};
|
||||
|
||||
#define _MS_REG_RTKERNEL_REG(KNAME, clazz) \
|
||||
#define MS_REG_RTKERNEL_REG(KNAME, clazz) \
|
||||
static_assert(std::is_base_of<RtKernel, clazz>::value, " must be base of RtKernel"); \
|
||||
static const _RtKernelRegister g_##KNAME##_##_RtKernel_reg(#KNAME, []() { return std::make_shared<clazz>(); });
|
||||
static const RtKernelRegister g_##KNAME##_##_RtKernel_reg(#KNAME, []() { return std::make_shared<clazz>(); });
|
||||
|
||||
#define MS_REG_RTKERNEL(KNAME, clazz) _MS_REG_RTKERNEL_REG(KNAME, clazz)
|
||||
#define MS_REG_RTKERNEL(KNAME, clazz) MS_REG_RTKERNEL_REG(KNAME, clazz)
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
||||
|
|
|
@ -54,19 +54,19 @@ class RtKerDescFactory {
|
|||
std::map<std::string, RtKerDescCreater> fmap_;
|
||||
};
|
||||
|
||||
class _RtKerDescRegister {
|
||||
class RtKerDescRegister {
|
||||
public:
|
||||
_RtKerDescRegister(const std::string &name, RtKerDescCreater &&fun) {
|
||||
RtKerDescRegister(const std::string &name, RtKerDescCreater &&fun) {
|
||||
RtKerDescFactory::Get().Register(name, std::move(fun));
|
||||
}
|
||||
~_RtKerDescRegister() = default;
|
||||
~RtKerDescRegister() = default;
|
||||
};
|
||||
|
||||
#define _MS_REG_RTKERNEL_DESC_REG(KNAME, clazz) \
|
||||
#define MS_REG_RTKERNEL_DESC_REG(KNAME, clazz) \
|
||||
static_assert(std::is_base_of<RtKerDesc, clazz>::value, " must be base of RtKerDesc"); \
|
||||
static const _RtKerDescRegister g_##KNAME##_##_rtkernel_desc_reg(#KNAME, []() { return std::make_shared<clazz>(); });
|
||||
static const RtKerDescRegister g_##KNAME##_##_rtkernel_desc_reg(#KNAME, []() { return std::make_shared<clazz>(); });
|
||||
|
||||
#define MS_REG_RTKERNEL_DESC(KNAME, clazz) _MS_REG_RTKERNEL_DESC_REG(KNAME, clazz)
|
||||
#define MS_REG_RTKERNEL_DESC(KNAME, clazz) MS_REG_RTKERNEL_DESC_REG(KNAME, clazz)
|
||||
|
||||
void GetRtKelInfo(const CNodePtr &kernel_node, std::vector<std::shared_ptr<kernel::KernelBuildInfo>> *kernel_info_list);
|
||||
} // namespace kernel
|
||||
|
|
|
@ -38,7 +38,7 @@ TensorCopySlices::~TensorCopySlices() {}
|
|||
|
||||
bool TensorCopySlices::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
|
||||
const std::vector<AddressPtr> &outputs, void *stream_ptr) {
|
||||
if (inputs.size() != 2) {
|
||||
if (inputs.size() != kTensorCopySlicesInputSize) {
|
||||
MS_LOG(ERROR) << "inputs size is not 2";
|
||||
return false;
|
||||
}
|
||||
|
@ -88,7 +88,7 @@ bool TensorCopySlices::Init(const mindspore::AnfNodePtr &anf_node) {
|
|||
void TensorCopySlices::GetInputOutputInfo(const AnfNodePtr &anf_node) {
|
||||
MS_EXCEPTION_IF_NULL(anf_node);
|
||||
size_t input_size = common::AnfAlgo::GetInputTensorNum(anf_node);
|
||||
if (input_size != 2) {
|
||||
if (input_size != kTensorCopySlicesInputSize) {
|
||||
MS_LOG(EXCEPTION) << "TensorCopySlices input size is not 2, got " << input_size;
|
||||
}
|
||||
input_type_id_ = AnfAlgo::GetPrevNodeOutputDeviceDataType(anf_node, 0);
|
||||
|
@ -134,7 +134,7 @@ void TensorCopySlices::GetInputOutputTotalCount(const AnfNodePtr &anf_node) {
|
|||
std::vector<TaskInfoPtr> TensorCopySlices::GenTask(const std::vector<AddressPtr> &inputs,
|
||||
const std::vector<AddressPtr> &,
|
||||
const std::vector<AddressPtr> &outputs, uint32_t stream_id) {
|
||||
if (inputs.size() != 2) {
|
||||
if (inputs.size() != kTensorCopySlicesInputSize) {
|
||||
MS_LOG(EXCEPTION) << "inputs size is not 2.";
|
||||
}
|
||||
if (outputs.size() != 1) {
|
||||
|
|
|
@ -781,7 +781,8 @@ void TbeKernelCompileManager::DistributeCompileTask(const std::vector<CNodePtr>
|
|||
void TbeKernelCompileManager::TbePreBuild(const KernelGraphPtr &kernel_graph) {
|
||||
MS_EXCEPTION_IF_NULL(kernel_graph);
|
||||
MS_LOG(INFO) << "Single op pre build start.";
|
||||
struct timeval start_time, end_time;
|
||||
struct timeval start_time;
|
||||
struct timeval end_time;
|
||||
(void)gettimeofday(&start_time, nullptr);
|
||||
std::vector<CNodePtr> node_list;
|
||||
GetAllTbeNodes(kernel_graph, &node_list);
|
||||
|
|
|
@ -418,7 +418,8 @@ AnfNodePtr DynamicRnnGradFissionV2::CreateSplitV(const FuncGraphPtr &func_graph,
|
|||
}
|
||||
auto split_v = NewCNode(splitv_input, func_graph);
|
||||
// Set infer data type and shape
|
||||
ShapeVector shape1, shape2;
|
||||
ShapeVector shape1;
|
||||
ShapeVector shape2;
|
||||
if (specs.batch_size % kCubeSize == 0 && !specs.shape_need_align) {
|
||||
shape1 = {(origin_input6_shape[kDim0] - 1) * origin_input6_shape[kDim1], origin_input6_shape[kDim2]};
|
||||
shape2 = {origin_input6_shape[kDim1], origin_input6_shape[kDim2]};
|
||||
|
|
|
@ -111,7 +111,10 @@ ValueNodePtr CreateMeanMatrixValueNode(const FuncGraphPtr &func_graph, const Anf
|
|||
<< x_shape << ", kernel_size:" << k_size << ", strides:" << stride
|
||||
<< trace::DumpSourceLines(node);
|
||||
}
|
||||
int64_t pad_top, pad_bottom, pad_left, pad_right;
|
||||
int64_t pad_top;
|
||||
int64_t pad_bottom;
|
||||
int64_t pad_left;
|
||||
int64_t pad_right;
|
||||
int64_t h_output =
|
||||
windowed_output_size(node, x_shape[kDim2], k_size[kDim2], stride[kDim2], pad_mode, &pad_top, &pad_bottom);
|
||||
int64_t w_output =
|
||||
|
|
|
@ -110,8 +110,9 @@ std::vector<int64_t> CalGenMaskOutputShape(const std::vector<int64_t> &shape) {
|
|||
|
||||
std::vector<int64_t> CalGenMaskV3OutputShape(const std::vector<int64_t> &shape, TypeId type) {
|
||||
// [*dim, M, N] -> [*dim, N/16, M/16, 16, 16] if M%16=0 and N%16=0
|
||||
if (shape.size() >= 2 && shape[shape.size() - 1] % static_cast<int64_t>(kCubeSize) == 0 &&
|
||||
shape[shape.size() - 2] % static_cast<int64_t>(kCubeSize) == 0) {
|
||||
constexpr auto cube_h_offset = 2;
|
||||
if (shape.size() >= cube_h_offset && shape[shape.size() - 1] % static_cast<int64_t>(kCubeSize) == 0 &&
|
||||
shape[shape.size() - cube_h_offset] % static_cast<int64_t>(kCubeSize) == 0) {
|
||||
auto fnz_shape = trans::TransShapeToDevice(shape, kOpFormat_FRAC_NZ, type);
|
||||
return fnz_shape;
|
||||
}
|
||||
|
|
|
@ -48,6 +48,7 @@ constexpr int64_t kRankIdFive = 5;
|
|||
constexpr int64_t kRankIdSix = 6;
|
||||
constexpr int64_t kRankIdSeven = 7;
|
||||
constexpr size_t kSizeFour = 4;
|
||||
constexpr size_t kSizeEight = 8;
|
||||
constexpr int64_t kInvalidId = -1;
|
||||
constexpr size_t kMinSplitOutputSize = 2;
|
||||
|
||||
|
@ -215,7 +216,7 @@ AnfNodePtr GetCenter(const FuncGraphPtr &graph, const CNodePtr &neighbor_exchang
|
|||
std::vector<AnfNodePtr> CreateAllToAllvInputForGrad(const std::vector<int64_t> &send_rank_ids,
|
||||
const std::vector<std::vector<AnfNodePtr>> &split_outputs,
|
||||
const std::vector<CNodePtr> &split_nodes) {
|
||||
if (send_rank_ids.size() != 8) {
|
||||
if (send_rank_ids.size() != kSizeEight) {
|
||||
MS_LOG(EXCEPTION) << "Wrong send_rank_ids size: " << send_rank_ids.size() << ", expect size: 8.";
|
||||
}
|
||||
if (split_outputs.size() != kSizeFour) {
|
||||
|
|
Loading…
Reference in New Issue