diff --git a/config/e2e_dump_config.json b/config/e2e_dump_config.json index ad75c2f27f8..fdba941f971 100644 --- a/config/e2e_dump_config.json +++ b/config/e2e_dump_config.json @@ -6,17 +6,17 @@ "net_name": "ResNet50", "mode": 0, "iteration": 0, - "kernels": ["TensorAdd"] + "kernels": ["Default/Conv2D-op2", "Default/TensorAdd-op10"] }, "DumpSettingsSpec": { - "enable": "true: dump enable false: dump disable", - "trans_flag": "true: trans to host format,false: not trans format", + "enable": "true: dump enable, false: dump disable", + "trans_flag": "true: trans to host format, false: not trans format", "path": "the dump file folder", "net_name": "net name eg:ResNet50", - "mode": "0: dump all kernels 1: dump kernels in kernels list", - "iteration": "0: all iteration others: specified iteration ", - "kernels": "kernel name list need to be dump" + "mode": "0: dump all kernels, 1: dump kernels in kernels list", + "iteration": "0: all iteration, others: specified iteration ", + "kernels": "op's full scope name which need to be dump" }, "other": {} } \ No newline at end of file diff --git a/config/e2e_dump_config_0.json b/config/e2e_dump_config_0.json index a67a4daba07..64b18b8b553 100644 --- a/config/e2e_dump_config_0.json +++ b/config/e2e_dump_config_0.json @@ -6,17 +6,17 @@ "net_name": "ResNet50", "mode": 0, "iteration": 0, - "kernels": ["AllReduce","BiasAddGrad","Conv2DBackpropFilter","SparseSoftmaxCrossEntropyWithLogits"] + "kernels": ["Default/Conv2D-op2", "Default/TensorAdd-op10"] }, "DumpSettingsSpec": { - "enable": "true: dump enable false: dump disable", - "trans_flag": "true: trans to host format,false: not trans format", + "enable": "true: dump enable, false: dump disable", + "trans_flag": "true: trans to host format, false: not trans format", "path": "the dump file folder", "net_name": "net name eg:ResNet50", - "mode": "0: dump all kernels 1: dump kernels in kernels list", - "iteration": "0: all iteration others: specified iteration ", - "kernels": "kernel name list need to be dump" + "mode": "0: dump all kernels, 1: dump kernels in kernels list", + "iteration": "0: all iteration, others: specified iteration ", + "kernels": "op's full scope name which need to be dump" }, "other": {} -} +} \ No newline at end of file diff --git a/config/e2e_dump_config_1.json b/config/e2e_dump_config_1.json index 226b91ae097..14864877996 100644 --- a/config/e2e_dump_config_1.json +++ b/config/e2e_dump_config_1.json @@ -6,17 +6,17 @@ "net_name": "ResNet50", "mode": 0, "iteration": 0, - "kernels": ["AllReduce","BiasAddGrad","Conv2DBackpropFilter","SparseSoftmaxCrossEntropyWithLogits"] + "kernels": ["Default/Conv2D-op2", "Default/TensorAdd-op10"] }, "DumpSettingsSpec": { - "enable": "true: dump enable false: dump disable", - "trans_flag": "true: trans to host format,false: not trans format", + "enable": "true: dump enable, false: dump disable", + "trans_flag": "true: trans to host format, false: not trans format", "path": "the dump file folder", "net_name": "net name eg:ResNet50", - "mode": "0: dump all kernels 1: dump kernels in kernels list", - "iteration": "0: all iteration others: specified iteration ", - "kernels": "kernel name list need to be dump" + "mode": "0: dump all kernels, 1: dump kernels in kernels list", + "iteration": "0: all iteration, others: specified iteration ", + "kernels": "op's full scope name which need to be dump" }, "other": {} -} +} \ No newline at end of file diff --git a/mindspore/ccsrc/common/trans.cc b/mindspore/ccsrc/common/trans.cc index ea84537c1a3..4748d592864 100644 --- a/mindspore/ccsrc/common/trans.cc +++ b/mindspore/ccsrc/common/trans.cc @@ -53,6 +53,7 @@ enum DataTypeTransMode { FROM_INT8_TO_FLOAT, FROM_INT8_TO_INT32, FROM_INT64_TO_INT32, + FROM_UINT16_TO_INT32, }; const std::map, DataTypeTransMode> mode_map{ @@ -68,7 +69,8 @@ const std::map, DataTypeTransMode> mode_map{ {std::pair(kNumberTypeUInt8, kNumberTypeInt32), FROM_UINT8_TO_INT32}, {std::pair(kNumberTypeInt8, kNumberTypeFloat32), FROM_INT8_TO_FLOAT}, {std::pair(kNumberTypeInt8, kNumberTypeInt32), FROM_INT8_TO_INT32}, - {std::pair(kNumberTypeInt64, kNumberTypeInt32), FROM_INT64_TO_INT32}}; + {std::pair(kNumberTypeInt64, kNumberTypeInt32), FROM_INT64_TO_INT32}, + {std::pair(kNumberTypeUInt16, kNumberTypeInt32), FROM_UINT16_TO_INT32}}; template void TransDataSrc2Dst(const TypeIdArgs &args, void *dst, const size_t data_size) { @@ -116,6 +118,9 @@ bool CastKernel(const TypeIdArgs &args, void *dst, const size_t data_size, const case FROM_INT64_TO_INT32: TransDataSrc2Dst(args, dst, data_size); break; + case FROM_UINT16_TO_INT32: + TransDataSrc2Dst(args, dst, data_size); + break; default: MS_LOG(ERROR) << "unsupported datatype trans"; return false; diff --git a/mindspore/ccsrc/device/ascend/ascend_device_address.cc b/mindspore/ccsrc/device/ascend/ascend_device_address.cc index a521f1516f9..b8b7f452e37 100644 --- a/mindspore/ccsrc/device/ascend/ascend_device_address.cc +++ b/mindspore/ccsrc/device/ascend/ascend_device_address.cc @@ -106,13 +106,13 @@ bool AscendDeviceAddress::SyncDeviceToHost(const std::vector &shape, size_t } else { auto shape_size = trans::ShapeSize(host_shape); auto host = std::vector(size_); - const trans::TypeIdArgs type_args{ptr_, shape_size, type_id_, type}; - sync_ok = trans::TransDataType(type_args, host.data()); + SyncMemory(host.data(), ptr_, size_, RT_MEMCPY_DEVICE_TO_HOST); + const trans::TypeIdArgs type_args{host.data(), shape_size, type_id_, type}; + sync_ok = trans::TransDataType(type_args, host_ptr); if (!sync_ok) { MS_LOG(ERROR) << "trans data type failed."; return false; } - SyncMemory(host_ptr, host.data(), size, RT_MEMCPY_DEVICE_TO_HOST); } } else if (format_ == kOpFormat_NC1HWC0 || format_ == kOpFormat_FRAC_Z || format_ == kOpFormat_FRAC_NZ) { sync_ok = SyncDeviceToHostAndConvertFormat(shape, size, type, host_ptr); diff --git a/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.cc b/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.cc index a7dfc96b2f8..dc7eb5449b4 100644 --- a/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.cc +++ b/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.cc @@ -150,9 +150,9 @@ void DumpOutput(mindspore::session::KernelGraph *graph, const string &dump_path, auto output_size = AnfAlgo::GetOutputTensorNum(node); for (size_t j = 0; j < output_size; ++j) { auto addr = AnfAlgo::GetOutputAddr(node, j); - auto shape = AnfAlgo::GetOutputDeviceShape(node, j); - auto type = AnfAlgo::GetOutputDeviceDataType(node, j); - auto format = AnfAlgo::GetOutputFormat(node, j); + auto shape = AnfAlgo::GetOutputInferShape(node, j); + auto type = AnfAlgo::GetOutputInferDataType(node, j); + auto format = kOpFormat_DEFAULT; string filepath = dump_path + '/' + kernel_name + '_' + "output_" + std::to_string(j); auto ascend_addr = dynamic_cast(addr); std::vector int_shapes; @@ -181,9 +181,9 @@ void DumpParameters(mindspore::session::KernelGraph *graph, const string &dump_p continue; } auto addr = AnfAlgo::GetOutputAddr(item, PRAMATER_OUTPUT_INDEX); - auto shape = AnfAlgo::GetOutputDeviceShape(item, PRAMATER_OUTPUT_INDEX); - auto type = AnfAlgo::GetOutputDeviceDataType(item, PRAMATER_OUTPUT_INDEX); - auto format = AnfAlgo::GetOutputFormat(item, PRAMATER_OUTPUT_INDEX); + auto shape = AnfAlgo::GetOutputInferShape(item, PRAMATER_OUTPUT_INDEX); + auto type = AnfAlgo::GetOutputInferDataType(item, PRAMATER_OUTPUT_INDEX); + auto format = kOpFormat_DEFAULT; string filepath = dump_path + '/' + parameter_name + '_' + "output_0"; auto ascend_addr = dynamic_cast(addr); std::vector int_shapes;