move enterprise code to opensource

This commit is contained in:
buxue 2022-11-05 07:51:58 +04:30
parent 3008a3cee8
commit 8c6ee47470
21 changed files with 122 additions and 156 deletions

View File

@ -5,7 +5,7 @@ mindspore.train.OnRequestExit
响应用户关闭请求退出训练或推理进程保存checkpoint和mindir。
在训练开始前注册OnRequestExit回调当用户想要退出训练进程并保存训练数据时可通过发送注册的退出信号"sig"到训练进程。
在训练开始前注册OnRequestExit回调当用户想要退出训练进程并保存训练数据时可通过发送注册的退出信号 `sig` 到训练进程。
训练进程执行完当前step后保存当前训练状态包括checkpoint和mindir然后退出训练过程。
参数:
@ -17,7 +17,7 @@ mindspore.train.OnRequestExit
异常:
- **ValueError** - `save_ckpt` 不是bool值 。
- **ValueError** - `save_mindir` 不是字符串
- **ValueError** - `save_mindir` 不是bool值
- **ValueError** - `file_name` 不是字符串。
- **ValueError** - `directory` 不是字符串。
- **ValueError** - `sig` 不是int值或者是signal.SIGKILL。

View File

@ -26,7 +26,6 @@
#include "backend/common/pass/convert_tuple_input_to_dynamic_input.h"
#include "backend/common/pass/convert_const_scalar_to_tensor.h"
#include "backend/common/pass/convert_attr_to_unify_mindir.h"
#include "backend/common/pass/add_training_attr.h"
#include "backend/common/pass/optimize_updatestate.h"
#include "backend/common/pass/conv_transpose_to_conv_bp.h"
#include "backend/common/pass/reduce_sum_optimizer.h"
@ -71,7 +70,6 @@ void BackendCommonOptimization(const std::shared_ptr<session::KernelGraph> &kern
common_pm->AddPass(std::make_shared<ConvertUnusedTupleParaToMakeTuple>());
common_pm->AddPass(std::make_shared<ConvertConstScalarToTensor>());
common_pm->AddPass(std::make_shared<ConvertTupleInputToDynamicInput>());
common_pm->AddPass(std::make_shared<AddTrainingAttr>());
common_pm->AddPass(std::make_shared<FlattenConcatFission>());
common_pm->AddPass(std::make_shared<AddDropoutAttrs>());
optimizer->AddPassManager(common_pm);

View File

@ -25,32 +25,11 @@
#include "ir/graph_utils.h"
#include "backend/common/optimizer/helper.h"
#include "include/common/utils/anfalgo.h"
#include "utils/ms_context.h"
namespace mindspore {
namespace opt {
namespace {
mindspore::HashMap<std::string, mindspore::HashSet<std::string>> MarkOp{
{"LSTM", {"LSTMGradWeight", "LSTMGrad", "LSTMGradData"}}};
bool CheckOP(const FuncGraphManagerPtr &manager, const AnfNodePtr &cnode, const mindspore::HashSet<std::string> &set) {
MS_EXCEPTION_IF_NULL(manager);
MS_EXCEPTION_IF_NULL(cnode);
for (const auto &node_index : manager->node_users()[cnode]) {
auto output = node_index.first;
MS_EXCEPTION_IF_NULL(output);
if (common::AnfAlgo::CheckPrimitiveType(output, prim::kPrimTupleGetItem)) {
if (CheckOP(manager, output, set)) {
return true;
}
} else if (output->isa<CNode>()) {
auto name = common::AnfAlgo::GetCNodeName(output);
if (set.find(name) != set.end()) {
return true;
}
}
}
return false;
}
void AddAttrTraining(const FuncGraphPtr &func_graph, const CNodePtr &cnode) {
MS_EXCEPTION_IF_NULL(func_graph);
MS_EXCEPTION_IF_NULL(cnode);
@ -59,9 +38,9 @@ void AddAttrTraining(const FuncGraphPtr &func_graph, const CNodePtr &cnode) {
if (manager->node_users().find(cnode) == manager->node_users().end()) {
return;
}
auto set = MarkOp[common::AnfAlgo::GetCNodeName(cnode)];
if (CheckOP(manager, cnode, set)) {
cnode->AddAttr(kAttrIsTraining, MakeValue(true));
auto prim = GetCNodePrimitive(cnode);
if (prim->HasAttr(kAttrIsTraining)) {
cnode->AddAttr(kAttrIsTraining, prim->GetAttr(kAttrIsTraining));
} else {
cnode->AddAttr(kAttrIsTraining, MakeValue(false));
}
@ -78,8 +57,7 @@ const AnfNodePtr AddTrainingAttr::Process(const FuncGraphPtr &func_graph, const
return nullptr;
}
auto name = common::AnfAlgo::GetCNodeName(node);
auto iter = MarkOp.find(name);
if (iter == MarkOp.end()) {
if (name != prim::kPrimLstm->name()) {
return nullptr;
}
auto cnode = node->cast<CNodePtr>();

View File

@ -39,6 +39,7 @@
#include "backend/common/pass/communication_op_fusion.h"
#include "backend/common/pass/replace_node_by_proxy.h"
#include "backend/common/pass/erase_visit_attr.h"
#include "backend/common/pass/add_training_attr.h"
#include "backend/common/pass/insert_tensor_move_for_communication.h"
#include "common/graph_kernel/adapter/graph_kernel_optimization.h"
#include "common/graph_kernel/adapter/expander.h"
@ -180,6 +181,7 @@ void CPUKernelExecutor::OptimizeGraphImpl(const KernelGraphPtr &graph) const {
pm->AddPass(std::make_shared<opt::InsertCastCPU>("insert_cast"));
pm->AddPass(std::make_shared<opt::EraseVisitAttr>());
pm->AddPass(std::make_shared<opt::InsertTensorMoveForCommunication>());
pm->AddPass(std::make_shared<opt::AddTrainingAttr>());
optimizer->AddPassManager(pm);
(void)optimizer->Optimize(graph);
graph->SetExecOrderByDefault();

View File

@ -222,7 +222,7 @@ template <typename T>
template <typename Op>
void ArithLogicCpuTypeFunc<T>::BinaryOp(const T *input1, const T *input2, bool *out, Op op) {
int64_t input1_size = 1;
int64_t input2_size = 2;
int64_t input2_size = 1;
for (size_t i = 0; i < output_shape_.size(); i++) {
input1_size *= input_shape1_[i];
@ -270,7 +270,7 @@ template <typename T>
template <typename Op>
void ArithComplexLogicCpuTypeFunc<T>::BinaryOp(const T *input1, const T *input2, bool *out, Op op) {
int64_t input1_size = 1;
int64_t input2_size = 2;
int64_t input2_size = 1;
for (size_t i = 0; i < output_shape_.size(); i++) {
input1_size *= input_shape1_[i];

View File

@ -76,14 +76,17 @@ bool DropoutCpuKernelMod::LaunchKernel(const std::vector<kernel::AddressPtr> &in
const auto *input_addr = reinterpret_cast<T *>(inputs[0]->addr);
auto *output_addr = reinterpret_cast<T *>(outputs[0]->addr);
auto mask_addr = reinterpret_cast<T *>(outputs[1]->addr);
std::random_device rd;
std::mt19937 gen(rd());
std::bernoulli_distribution dis(keep_prob_);
T scale = static_cast<T>(1.f / keep_prob_);
for (uint64_t i = 0; i < tensor_size_; ++i) {
mask_addr[i] = static_cast<T>(dis(gen));
output_addr[i] = mask_addr[i] * input_addr[i] * scale;
}
std::random_device rd;
std::default_random_engine generator(rd());
std::uniform_real_distribution<float> uniform(0.f, 1.f);
auto task = [input_addr, output_addr, mask_addr, scale, &uniform, &generator, this](size_t start, size_t end) {
for (size_t i = start; i < end; i++) {
mask_addr[i] = static_cast<T>(uniform(generator) < keep_prob_);
output_addr[i] = mask_addr[i] * input_addr[i] * scale;
}
};
ParallelLaunchAutoSearch(task, tensor_size_, this, &parallel_search_info_);
return true;
}

View File

@ -53,7 +53,7 @@ class DropoutCpuKernelMod : public NativeCpuKernelMod, public MatchKernelHelper<
ShapeVector input_shape_;
float keep_prob_{0.0};
uint64_t tensor_size_{1};
size_t tensor_size_{1};
};
} // namespace kernel
} // namespace mindspore

View File

@ -27,6 +27,14 @@ constexpr auto kFlattenGrad = "FlattenGrad";
constexpr auto kExpandDims = "ExpandDims";
constexpr auto kSqueeze = "Squeeze";
} // namespace
bool MemcpyCpuKernelMod::Init(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &,
const std::vector<KernelTensorPtr> &) {
MS_EXCEPTION_IF_NULL(base_operator);
kernel_name_ = base_operator->name();
return true;
}
bool MemcpyCpuKernelMod::Launch(const std::vector<kernel::AddressPtr> &inputs, const std::vector<kernel::AddressPtr> &,
const std::vector<kernel::AddressPtr> &outputs) {
if (inputs.empty()) {
@ -40,11 +48,19 @@ bool MemcpyCpuKernelMod::Launch(const std::vector<kernel::AddressPtr> &inputs, c
if (inputs[0]->addr == outputs[0]->addr) {
return true;
}
size_t copy_size = outputs[0]->size;
auto ret = memcpy_s(outputs[0]->addr, copy_size, inputs[0]->addr, copy_size);
if (ret != 0) {
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', memcpy_s error. Error no: " << ret;
const auto *input_addr = reinterpret_cast<unsigned char *>(inputs[0]->addr);
auto *output_addr = reinterpret_cast<unsigned char *>(outputs[0]->addr);
int cp_ret = EOK;
auto task = [input_addr, output_addr, &cp_ret](size_t start, size_t end) {
auto ret = memcpy_s(output_addr + start, end - start, input_addr + start, end - start);
if (ret != EOK && cp_ret == EOK) {
cp_ret = ret;
}
};
if (cp_ret != EOK) {
MS_LOG(EXCEPTION) << "For " << kernel_name_ << ", memcpy error, errorno: " << cp_ret;
}
ParallelLaunchAutoSearch(task, outputs[0]->size, this, &parallel_search_info_);
return true;
}

View File

@ -33,11 +33,7 @@ class MemcpyCpuKernelMod : public NativeCpuKernelMod {
~MemcpyCpuKernelMod() override = default;
bool Init(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &,
const std::vector<KernelTensorPtr> &) override {
MS_EXCEPTION_IF_NULL(base_operator);
kernel_name_ = base_operator->name();
return true;
}
const std::vector<KernelTensorPtr> &) override;
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) override;

View File

@ -36,7 +36,14 @@ constexpr size_t kAddNOutputsNum = 1;
void AddInt(const int *in_0, const int *in_1, int *out, int start, int end) {
int ret = ElementAddInt(in_0 + start, in_1 + start, out + start, end - start);
if (ret != NNACL_OK) {
MS_LOG(EXCEPTION) << "Add failed.";
MS_LOG(EXCEPTION) << "For 'AddN', AddInt failed.";
}
}
void AddFloat(const float *in_0, const float *in_1, float *out, int start, int end) {
int ret = ElementAdd(in_0 + start, in_1 + start, out + start, end - start);
if (ret != NNACL_OK) {
MS_LOG(EXCEPTION) << "For 'AddN', AddFloat failed.";
}
}
@ -66,72 +73,34 @@ bool AddNCpuKernelMod::Init(const BaseOperatorPtr &base_operator, const std::vec
return false;
}
kernel_func_ = func_list_[index].second;
dtype_ = inputs[kIndex0]->GetDtype();
return true;
}
int AddNCpuKernelMod::Resize(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
const std::vector<KernelTensorPtr> &outputs,
const std::map<uint32_t, tensor::TensorPtr> &inputsOnHost) {
if (auto ret = KernelMod::Resize(base_operator, inputs, outputs, inputsOnHost); ret != KRET_OK) {
return ret;
}
auto src0_shape = inputs[kIndex0]->GetDeviceShapeAdaptively();
auto src1_shape = inputs[kIndex1]->GetDeviceShapeAdaptively();
auto dst_shape = outputs[kIndex0]->GetDeviceShapeAdaptively();
dnnl::memory::desc src0_mem_desc = GetDefaultMemDesc(src0_shape);
dnnl::memory::desc src1_mem_desc = GetDefaultMemDesc(src1_shape);
dnnl::memory::desc dst_mem_desc = GetDefaultMemDesc(dst_shape);
auto desc = CreateDesc<dnnl::binary::desc>(dnnl::algorithm::binary_add, src0_mem_desc, src1_mem_desc, dst_mem_desc);
auto prim_desc = CreateDesc<dnnl::binary::primitive_desc>(desc, engine_);
primitive_ = CreatePrimitive<dnnl::binary>(prim_desc);
AddArgument(DNNL_ARG_SRC_0, src0_mem_desc);
AddArgument(DNNL_ARG_SRC_1, src1_mem_desc);
AddArgument(DNNL_ARG_DST, dst_mem_desc);
return KRET_OK;
}
template <typename T>
bool AddNCpuKernelMod::LaunchKernel(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> &,
const std::vector<kernel::AddressPtr> &outputs) {
CHECK_KERNEL_INPUTS_NUM(inputs.size(), input_num_, kernel_name_);
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kAddNOutputsNum, kernel_name_);
if (dtype_ == kNumberTypeFloat32) {
SetArgumentHandle(DNNL_ARG_SRC_0, inputs[0]->addr);
SetArgumentHandle(DNNL_ARG_SRC_1, inputs[1]->addr);
SetArgumentHandle(DNNL_ARG_DST, outputs[0]->addr);
ExecutePrimitive();
for (size_t index = 2; index < input_num_; ++index) {
SetArgumentHandle(DNNL_ARG_SRC_0, outputs[0]->addr);
SetArgumentHandle(DNNL_ARG_SRC_1, inputs[index]->addr);
SetArgumentHandle(DNNL_ARG_DST, outputs[0]->addr);
ExecutePrimitive();
}
} else if (dtype_ == kNumberTypeInt32) {
size_t elements_num = outputs[0]->size / sizeof(int);
const auto input_0 = reinterpret_cast<int *>(inputs[0]->addr);
const auto input_1 = reinterpret_cast<int *>(inputs[1]->addr);
auto output = reinterpret_cast<int *>(outputs[0]->addr);
auto task_0 = std::bind(AddInt, input_0, input_1, output, std::placeholders::_1, std::placeholders::_2);
ParallelLaunchAutoSearch(task_0, elements_num, this, &parallel_search_info_);
for (size_t index = 2; index < input_num_; ++index) {
const auto input = reinterpret_cast<int *>(inputs[index]->addr);
auto task = std::bind(AddInt, input, output, output, std::placeholders::_1, std::placeholders::_2);
ParallelLaunchAutoSearch(task, elements_num, this, &parallel_search_info_);
}
std::function<void(const T *, const T *, T *, int, int)> comput_func;
if constexpr (std::is_same<T, float>::value) {
comput_func = AddFloat;
} else if constexpr (std::is_same<T, int>::value) {
comput_func = AddInt;
} else {
size_t elements_num = outputs[0]->size / sizeof(T);
const auto input_0 = reinterpret_cast<T *>(inputs[0]->addr);
const auto input_1 = reinterpret_cast<T *>(inputs[1]->addr);
auto output = reinterpret_cast<T *>(outputs[0]->addr);
auto task_0 = std::bind(AddT<T>, input_0, input_1, output, std::placeholders::_1, std::placeholders::_2);
ParallelLaunchAutoSearch(task_0, elements_num, this, &parallel_search_info_);
for (size_t index = 2; index < input_num_; ++index) {
const auto input = reinterpret_cast<T *>(inputs[index]->addr);
auto task = std::bind(AddT<T>, input, output, output, std::placeholders::_1, std::placeholders::_2);
ParallelLaunchAutoSearch(task, elements_num, this, &parallel_search_info_);
}
comput_func = AddT<T>;
}
size_t elements_num = outputs[0]->size / sizeof(T);
const auto input_0 = reinterpret_cast<T *>(inputs[0]->addr);
const auto input_1 = reinterpret_cast<T *>(inputs[1]->addr);
auto output = reinterpret_cast<T *>(outputs[0]->addr);
auto task_0 = std::bind(comput_func, input_0, input_1, output, std::placeholders::_1, std::placeholders::_2);
ParallelLaunchAutoSearch(task_0, elements_num, this, &parallel_search_info_);
for (size_t index = 2; index < input_num_; ++index) {
const auto input = reinterpret_cast<T *>(inputs[index]->addr);
auto task = std::bind(comput_func, input, output, output, std::placeholders::_1, std::placeholders::_2);
ParallelLaunchAutoSearch(task, elements_num, this, &parallel_search_info_);
}
return true;
}

View File

@ -32,9 +32,6 @@ class AddNCpuKernelMod : public MKLCpuKernelMod {
bool Init(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
const std::vector<KernelTensorPtr> &outputs) override;
int Resize(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
const std::vector<KernelTensorPtr> &outputs, const std::map<uint32_t, tensor::TensorPtr> &) override;
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) override {
return kernel_func_(this, inputs, workspace, outputs);
@ -54,7 +51,6 @@ class AddNCpuKernelMod : public MKLCpuKernelMod {
size_t input_num_{0};
std::vector<size_t> output_shape_;
TypeId dtype_{kNumberTypeFloat32};
};
} // namespace kernel
} // namespace mindspore

View File

@ -50,18 +50,17 @@ bool LstmCpuKernelMod::Init(const BaseOperatorPtr &base_operator, const std::vec
<< kLstmOutputsNum << ", but get " << inputs.size() << " and " << outputs.size();
return false;
}
auto kernel_ptr = std::dynamic_pointer_cast<ops::LSTM>(base_operator);
if (!kernel_ptr) {
MS_LOG(ERROR) << "Cast LSTM ops failed!";
return false;
}
bidirectional_ = kernel_ptr->get_bidirectional();
input_size_ = kernel_ptr->get_input_size();
hidden_size_ = kernel_ptr->get_hidden_size();
num_layers_ = kernel_ptr->get_num_layers();
has_bias_ = kernel_ptr->get_has_bias();
constexpr int kBidirectional = 2;
num_directions_ = 1;
if (bidirectional_) {
@ -74,22 +73,18 @@ bool LstmCpuKernelMod::Init(const BaseOperatorPtr &base_operator, const std::vec
if (num_layers_ > kMaxLSTMLayer) {
MS_LOG(EXCEPTION) << "Layers must be lower than 100!";
}
for (int i = 0; i < num_layers_; ++i) {
weight_size_ += gate_size * (i == 0 ? input_size_ : hidden_size_ * num_directions_);
weight_h_size_ += gate_size * hidden_size_;
}
weight_size_ = weight_size_ * num_directions_;
weight_h_size_ = weight_h_size_ * num_directions_;
weights_dims_ = {num_layers_, num_directions_, input_size_, kGateNum, hidden_size_};
weights_h_dims_ = {num_layers_, num_directions_, hidden_size_, kGateNum, hidden_size_};
bias_dims_ = {num_layers_, num_directions_, kGateNum, hidden_size_};
if (base_operator->HasAttr(kAttrIsTraining)) {
is_training_ = GetValue<bool>(base_operator->GetAttr(kAttrIsTraining));
} else {
is_training_ = true;
}
is_training_ =
base_operator->HasAttr(kAttrIsTraining) ? GetValue<bool>(base_operator->GetAttr(kAttrIsTraining)) : true;
return true;
}
@ -111,8 +106,6 @@ int LstmCpuKernelMod::Resize(const BaseOperatorPtr &base_operator, const std::ve
if (num_directions_ * num_layers_ != src_h_shape[0]) {
MS_LOG(EXCEPTION) << "Error iteration shape!";
}
auto eng = engine_;
dnnl::rnn_direction direction = dnnl::rnn_direction::unidirectional;
if (bidirectional_) {
direction = dnnl::rnn_direction::bidirectional_concat;
@ -131,27 +124,22 @@ int LstmCpuKernelMod::Resize(const BaseOperatorPtr &base_operator, const std::ve
dnnl::memory::desc dst_h_desc = formatted_md(dst_h_dims, tag::ldnc);
dnnl::memory::desc dst_c_desc = formatted_md(dst_c_dims, tag::ldnc);
auto prop_kind = dnnl::prop_kind::forward_training;
if (!is_training_) {
prop_kind = dnnl::prop_kind::forward_inference;
}
auto prop_kind = is_training_ ? dnnl::prop_kind::forward_training : dnnl::prop_kind::forward_inference;
auto weights_desc = formatted_md(weights_dims_, tag::any);
auto weights_h_desc = formatted_md(weights_h_dims_, tag::any);
auto desc =
CreatePrimitive<dnnl::lstm_forward::desc>(prop_kind, direction, src_desc, src_h_desc, src_c_desc, weights_desc,
weights_h_desc, bias_desc, dst_desc, dst_h_desc, dst_c_desc);
prim_desc_ = CreateDesc<dnnl::lstm_forward::primitive_desc>(*desc, eng);
prim_desc_ = CreateDesc<dnnl::lstm_forward::primitive_desc>(*desc, engine_);
primitive_ = CreatePrimitive<dnnl::lstm_forward>(prim_desc_);
auto weights_layer = GetWeightsLayerDesc(prim_desc_);
auto weights_iter = GetWeightsIterDesc(prim_desc_);
bias_desc_ = GetBiasDesc(prim_desc_);
if (is_training_) {
auto wksp_desc = GetWorkspaceDesc(prim_desc_);
reserve_size_ = GetSize(wksp_desc);
AddArgument(DNNL_ARG_WORKSPACE, wksp_desc);
} else {
reserve_size_ = 1;
}
auto weights_layer = GetWeightsLayerDesc(prim_desc_);
auto weights_iter = GetWeightsIterDesc(prim_desc_);
bias_desc_ = GetBiasDesc(prim_desc_);
AddArgument(DNNL_ARG_SRC_LAYER, src_desc);
AddArgument(DNNL_ARG_SRC_ITER, src_h_desc);
AddArgument(DNNL_ARG_SRC_ITER_C, src_c_desc);
@ -164,11 +152,11 @@ int LstmCpuKernelMod::Resize(const BaseOperatorPtr &base_operator, const std::ve
auto weights_dims_desc = CreateDesc<dnnl::memory::desc>(weights_dims_, dt::f32, tag::ldgoi);
auto weights_h_dims_desc = CreateDesc<dnnl::memory::desc>(weights_h_dims_, dt::f32, tag::ldgoi);
user_weights_memory_ = CreateDesc<dnnl::memory>(weights_dims_desc, eng);
user_weights_h_memory_ = CreateDesc<dnnl::memory>(weights_h_dims_desc, eng);
weights_memory_ = CreateDesc<dnnl::memory>(weights_layer, eng);
weights_h_memory_ = CreateDesc<dnnl::memory>(weights_iter, eng);
bias_memory_ = CreateDesc<dnnl::memory>(bias_desc_, eng);
user_weights_memory_ = CreateDesc<dnnl::memory>(weights_dims_desc, engine_);
user_weights_h_memory_ = CreateDesc<dnnl::memory>(weights_h_dims_desc, engine_);
weights_memory_ = CreateDesc<dnnl::memory>(weights_layer, engine_);
weights_h_memory_ = CreateDesc<dnnl::memory>(weights_iter, engine_);
bias_memory_ = CreateDesc<dnnl::memory>(bias_desc_, engine_);
InitOutputSize(outputs);
return KRET_OK;
@ -200,7 +188,7 @@ bool LstmCpuKernelMod::Launch(const std::vector<kernel::AddressPtr> &inputs, con
SetArgumentHandle(DNNL_ARG_DST_ITER, outputs[1]->addr);
SetArgumentHandle(DNNL_ARG_DST_ITER_C, outputs[2]->addr);
if (is_training_) {
SetArgumentHandle(DNNL_ARG_WORKSPACE, outputs[3]->addr);
SetArgumentHandle(DNNL_ARG_WORKSPACE, outputs[kOutputWorkSpaceIndex]->addr);
}
ExecutePrimitive();
return true;

View File

@ -69,7 +69,7 @@ class LstmCpuKernelMod : public MKLCpuKernelMod {
bool bidirectional_{false};
bool has_bias_{false};
bool is_training_{false};
size_t reserve_size_{0};
size_t reserve_size_{1};
dnnl::memory::dims weights_dims_;
dnnl::memory::dims weights_h_dims_;

View File

@ -40,7 +40,6 @@ constexpr int kDstIterCIdx = 6;
constexpr int kDiffDstLayerIdx = 7;
constexpr int kDiffDstIterIdx = 8;
constexpr int kDiffDstIterCIdx = 9;
constexpr int kWorkspaceIdx = 10;
constexpr int kNumberOne = 1;
constexpr int kNumberTwo = 2;
constexpr int kNumberFour = 4;
@ -154,8 +153,7 @@ void LSTMGradCpuKernelMod::InitDnnl() {
primitive_ = CreatePrimitive<dnnl::lstm_backward>(prim_backward_desc_);
auto wksp_desc = GetWorkspaceDesc(prim_forward_desc);
reserve_size_ = GetSize(wksp_desc);
AddArgument(DNNL_ARG_WORKSPACE, wksp_desc);
AddArgumentOp(src_desc, src_h_desc, src_c_desc, bias_desc, dst_desc, dst_h_desc, dst_c_desc);
AddArgumentOp(src_desc, src_h_desc, src_c_desc, bias_desc, dst_desc, dst_h_desc, dst_c_desc, wksp_desc);
// construct fw memory
weights_layer_desc_ = GetWeightsLayerDesc(prim_backward_desc_);
@ -183,7 +181,7 @@ void LSTMGradCpuKernelMod::InitDnnl() {
void LSTMGradCpuKernelMod::AddArgumentOp(const dnnl::memory::desc &src_desc, const dnnl::memory::desc &src_h_desc,
const dnnl::memory::desc &src_c_desc, const dnnl::memory::desc &bias_desc,
const dnnl::memory::desc &dst_desc, const dnnl::memory::desc &dst_h_desc,
const dnnl::memory::desc &dst_c_desc) {
const dnnl::memory::desc &dst_c_desc, const dnnl::memory::desc &wksp_desc) {
AddArgument(DNNL_ARG_SRC_LAYER, src_desc);
AddArgument(DNNL_ARG_SRC_ITER, src_h_desc);
AddArgument(DNNL_ARG_SRC_ITER_C, src_c_desc);
@ -202,6 +200,7 @@ void LSTMGradCpuKernelMod::AddArgumentOp(const dnnl::memory::desc &src_desc, con
AddArgument(DNNL_ARG_DIFF_DST_LAYER, dst_desc);
AddArgument(DNNL_ARG_DIFF_DST_ITER, dst_h_desc);
AddArgument(DNNL_ARG_DIFF_DST_ITER_C, dst_c_desc);
AddArgument(DNNL_ARG_WORKSPACE, wksp_desc);
}
void LSTMGradCpuKernelMod::SetArgumentHandleOp(const std::vector<kernel::AddressPtr> &inputs,
@ -215,7 +214,7 @@ void LSTMGradCpuKernelMod::SetArgumentHandleOp(const std::vector<kernel::Address
SetArgumentHandle(DNNL_ARG_DST_LAYER, inputs[kDstLayerIdx]->addr);
SetArgumentHandle(DNNL_ARG_DST_ITER, inputs[kDstIterIdx]->addr);
SetArgumentHandle(DNNL_ARG_DST_ITER_C, inputs[kDstIterCIdx]->addr);
SetArgumentHandle(DNNL_ARG_WORKSPACE, inputs[kWorkspaceIdx]->addr);
SetArgumentHandle(DNNL_ARG_WORKSPACE, inputs[kInputWorkSpaceIndex]->addr);
SetArgumentHandle(DNNL_ARG_DIFF_SRC_LAYER, outputs[kSrcLayerIdx]->addr);
SetArgumentHandle(DNNL_ARG_DIFF_SRC_ITER, outputs[kSrcIterIdx]->addr);
SetArgumentHandle(DNNL_ARG_DIFF_SRC_ITER_C, outputs[kSrcIterCIdx]->addr);

View File

@ -64,7 +64,7 @@ class LSTMGradCpuKernelMod : public MKLCpuKernelMod {
void AddArgumentOp(const dnnl::memory::desc &src_desc, const dnnl::memory::desc &src_h_desc,
const dnnl::memory::desc &src_c_desc, const dnnl::memory::desc &bias_desc,
const dnnl::memory::desc &dst_desc, const dnnl::memory::desc &dst_h_desc,
const dnnl::memory::desc &dst_c_desc);
const dnnl::memory::desc &dst_c_desc, const dnnl::memory::desc &wksp_desc);
void SetArgumentHandleOp(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> &outputs);
void ResetMemory(const dnnl::memory &mem, const string name) const;

View File

@ -123,10 +123,10 @@ template <typename T>
bool UnpackCpuKernelMod::LaunchKernel(const std::vector<AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) {
const void *input = reinterpret_cast<void *>(inputs[0]->addr);
void **outputs_host = reinterpret_cast<void **>(workspace[0]->addr);
const auto *input = reinterpret_cast<unsigned char *>(inputs[0]->addr);
auto **outputs_host = reinterpret_cast<unsigned char **>(workspace[0]->addr);
for (size_t i = 0; i < outputs.size(); i++) {
outputs_host[i] = reinterpret_cast<T *>(outputs[i]->addr);
outputs_host[i] = reinterpret_cast<unsigned char *>(outputs[i]->addr);
}
size_t total_size = input_size_ * sizeof(T);
@ -135,7 +135,26 @@ bool UnpackCpuKernelMod::LaunchKernel(const std::vector<AddressPtr> &inputs,
<< total_size << " bytes";
}
int data_size = SizeToInt(sizeof(T));
Unstack(input, outputs_host, &unstack_param_, data_size);
int copy_size = unstack_param_.after_dims_ * data_size;
int cp_ret = EOK;
auto task = [this, input, outputs_host, data_size, copy_size, &cp_ret](size_t start, size_t end) {
for (size_t i = start; i < end; i++) {
int n = i / unstack_param_.axis_dim_;
int c = i % unstack_param_.axis_dim_;
int in_offset = n * unstack_param_.axis_dim_ * unstack_param_.after_dims_ + c * unstack_param_.after_dims_;
int out_offset = n * unstack_param_.after_dims_;
auto ret =
memcpy_s(outputs_host[c] + out_offset * data_size, copy_size, input + in_offset * data_size, copy_size);
if (ret != EOK && cp_ret == EOK) {
cp_ret = ret;
}
}
};
ParallelLaunchAutoSearch(task, IntToSize(unstack_param_.num_ * unstack_param_.pre_dims_), this,
&parallel_search_info_);
if (cp_ret != EOK) {
MS_LOG(EXCEPTION) << "For " << kernel_name_ << ", memcpy error, errorno: " << cp_ret;
}
return true;
}

View File

@ -36,7 +36,6 @@ bool BatchNormGradGradGpuKernelMod::Init(const BaseOperatorPtr &base_operator,
auto [is_match, index] = MatchKernelAttr(kernel_attr, GetOpSupport());
if (!is_match) {
MS_LOG(EXCEPTION) << kernel_name_ << " does not support this kernel data type: " << kernel_attr;
return false;
}
execute_func_ = func_list_[index].second;
is_training_ = op->get_is_training();

View File

@ -856,7 +856,8 @@ class Parser:
attr = 'source'
try:
source = inspect.getsourcelines(self.fn)
if context.get_context('support_binary') and '/mindspore/' not in self.filename and \
if context.get_context('support_binary') and \
'/mindspore/' not in self.filename and '\\mindspore\\' not in self.filename and \
(not hasattr(self.fn, attr) or getattr(self.fn, attr) != source):
if not os.access(self.filename, os.W_OK):
raise PermissionError(f"Don't have the write permission on the file {self.filename}.")

View File

@ -1213,6 +1213,7 @@ def get_bprop_lstm(self):
return dx, dhx, dcx, dw
if context.get_context('device_target') == "CPU":
self.add_prim_attr("is_training", True)
return bprop_cpu
return bprop

View File

@ -48,7 +48,7 @@ def test_cpu_profiling():
if os.path.isdir(data_path):
shutil.rmtree(data_path)
context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
device_id = context.get_context("device_id")
rank_id = int(os.getenv('RANK_ID')) if os.getenv('RANK_ID') else 0
profiler = Profiler(output_path="data_cpu_profiler")
x = np.random.randn(1, 3, 3, 4).astype(np.float32)
y = np.random.randn(1, 3, 3, 4).astype(np.float32)
@ -60,9 +60,9 @@ def test_cpu_profiling():
assert len(os.listdir(data_path)) == 1
profiler_dir = os.path.join(data_path, f"{os.listdir(data_path)[0]}/")
op_detail_file = f"{profiler_dir}cpu_op_detail_info_{device_id}.csv"
op_type_file = f"{profiler_dir}cpu_op_type_info_{device_id}.csv"
timeline_file = f"{profiler_dir}cpu_op_execute_timestamp_{device_id}.txt"
op_detail_file = f"{profiler_dir}cpu_op_detail_info_{rank_id}.csv"
op_type_file = f"{profiler_dir}cpu_op_type_info_{rank_id}.csv"
timeline_file = f"{profiler_dir}cpu_op_execute_timestamp_{rank_id}.txt"
cpu_profiler_files = (op_detail_file, op_type_file, timeline_file)
for file in cpu_profiler_files:
assert os.path.isfile(file)

View File

@ -24,7 +24,7 @@ import numpy as np
import pytest
from mindspore import Model
from mindspore import nn
from mindspore import nn, context
from mindspore import dataset as ds
from mindspore.common.initializer import TruncatedNormal
from mindspore.train.callback import Callback, OnRequestExit, LossMonitor
@ -136,6 +136,7 @@ def test_on_request_exit_callback():
Expectation: When a signal received,
the train process should be stopped and save the ckpt and mindir should be saved.
"""
context.set_context(mode=context.GRAPH_MODE)
directory = "./data"
if os.path.exists(directory):
shutil.rmtree(directory)