forked from mindspore-Ecosystem/mindspore
optimize the gpu context switch
This commit is contained in:
parent
1fde96546e
commit
05e8d95e7f
|
@ -85,7 +85,7 @@ class PoolingGradGpuFwdKernel : public GpuKernel {
|
|||
padded_descriptor_, padded, &beta, padded_descriptor_, padded_dx),
|
||||
"cudnnPoolingBackward failed");
|
||||
|
||||
CalPadGrad(padded_size_ / sizeof(T), padded_dx, n_, c_, old_height_, old_width_, old_height_ + pad_height_,
|
||||
CalPadGrad(output_size_ / sizeof(T), padded_dx, n_, c_, old_height_, old_width_, old_height_ + pad_height_,
|
||||
old_width_ + pad_width_, pad_top_, pad_left_, dx, reinterpret_cast<cudaStream_t>(stream_ptr));
|
||||
} else {
|
||||
CHECK_CUDNN_RET_WITH_EXCEPT(
|
||||
|
|
|
@ -139,16 +139,10 @@ PYBIND11_MODULE(_c_expression, m) {
|
|||
.def("set_save_ms_model_flag", &mindspore::MsContext::set_save_ms_model_flag, "Set whether to save ms model.")
|
||||
.def("get_save_ms_model_path", &mindspore::MsContext::save_ms_model_path, "Get path to save ms model.")
|
||||
.def("set_save_ms_model_path", &mindspore::MsContext::set_save_ms_model_path, "Set path to save ms model")
|
||||
.def("get_enable_gpu_summary", &mindspore::MsContext::enable_gpu_summary, "Get whether to enable gpu summary.")
|
||||
.def("set_enable_gpu_summary", &mindspore::MsContext::set_enable_gpu_summary, "Set whether to enable gpu summary.")
|
||||
.def("get_enable_dump", &mindspore::MsContext::enable_dump, "Get whether to enable dump.")
|
||||
.def("set_enable_dump", &mindspore::MsContext::set_enable_dump, "Set whether to enable dump.")
|
||||
.def("get_save_dump_path", &mindspore::MsContext::save_dump_path, "Get path to dump.")
|
||||
.def("set_save_dump_path", &mindspore::MsContext::set_save_dump_path, "Set path to dump.")
|
||||
.def("get_enable_dynamic_mem_pool", &mindspore::MsContext::enable_dynamic_mem_pool,
|
||||
"Get whether to enable dynamic mem pool.")
|
||||
.def("set_enable_dynamic_mem_pool", &mindspore::MsContext::set_enable_dynamic_mem_pool,
|
||||
"Set whether to enable dynamic mem pool.")
|
||||
.def("set_graph_memory_max_size", &mindspore::MsContext::set_graph_memory_max_size, "set graph memory max size.")
|
||||
.def("set_variable_memory_max_size", &mindspore::MsContext::set_variable_memory_max_size,
|
||||
"set variable memory max size");
|
||||
|
|
|
@ -265,14 +265,6 @@ class _Context:
|
|||
def save_ms_model_path(self, save_ms_model_path):
|
||||
self._context_handle.set_save_ms_model_path(save_ms_model_path)
|
||||
|
||||
@property
|
||||
def enable_gpu_summary(self):
|
||||
return self._context_handle.get_enable_gpu_summary()
|
||||
|
||||
@enable_gpu_summary.setter
|
||||
def enable_gpu_summary(self, enable_gpu_summary):
|
||||
self._context_handle.set_enable_gpu_summary(enable_gpu_summary)
|
||||
|
||||
@property
|
||||
def enable_auto_mixed_precision(self):
|
||||
return self._context_handle.get_auto_mixed_precision_flag()
|
||||
|
@ -315,14 +307,6 @@ class _Context:
|
|||
"""Sets whether to save the network class name in the scope."""
|
||||
self._thread_local_info.reserve_class_name_in_scope = reserve_class_name_in_scope
|
||||
|
||||
@property
|
||||
def enable_dynamic_memory(self):
|
||||
return self._context_handle.get_enable_dynamic_mem_pool()
|
||||
|
||||
@enable_dynamic_memory.setter
|
||||
def enable_dynamic_memory(self, enable_dynamic_memory):
|
||||
self._context_handle.set_enable_dynamic_mem_pool(enable_dynamic_memory)
|
||||
|
||||
@property
|
||||
def graph_memory_max_size(self):
|
||||
return None
|
||||
|
@ -485,9 +469,9 @@ def reset_auto_parallel_context():
|
|||
@args_type_check(mode=int, precompile_only=bool, device_target=str,
|
||||
device_id=int, enable_ir_fusion=bool, save_graphs=bool,
|
||||
enable_task_sink=bool, save_graphs_path=str, enable_loop_sink=bool,
|
||||
enable_mem_reuse=bool, save_ms_model=bool, save_ms_model_path=str, enable_gpu_summary=bool,
|
||||
enable_mem_reuse=bool, save_ms_model=bool, save_ms_model_path=str,
|
||||
enable_auto_mixed_precision=bool, enable_dump=bool, save_dump_path=str,
|
||||
enable_reduce_precision=bool, enable_dynamic_memory=bool, graph_memory_max_size=str,
|
||||
enable_reduce_precision=bool, graph_memory_max_size=str,
|
||||
variable_memory_max_size=str)
|
||||
def set_context(**kwargs):
|
||||
"""
|
||||
|
@ -521,7 +505,6 @@ def set_context(**kwargs):
|
|||
enable_mem_reuse (bool): Whether to enable memory reuse. Default: True.
|
||||
save_ms_model (bool): Whether to save lite model converted by graph. Default: False.
|
||||
save_ms_model_path (str): Path to save converted lite model. Default: "."
|
||||
enable_gpu_summary (bool): Whether to enable gpu summary. Default: True.
|
||||
save_graphs_path (str): Path to save graphs. Default: "."
|
||||
enable_auto_mixed_precision (bool): Whether to enable auto mixed precision. Default: True.
|
||||
reserve_class_name_in_scope (bool) : Whether to save the network class name in the scope. Default: True.
|
||||
|
@ -530,7 +513,6 @@ def set_context(**kwargs):
|
|||
save_dump_path (str): When the program is executed on Ascend, operators can dump data here.
|
||||
The root dump path is configured in /home/HwHiAiUser/ide_daemon/ide_daemon.cfg.
|
||||
So the real dump path is "{configured root dump path}/{`save_dump_path`}". Default: ".".
|
||||
enable_dynamic_memory (bool): Whether to enable dynamic memory. Default: False.
|
||||
graph_memory_max_size (str): Sets graph memory max size. Default: "26GB".
|
||||
variable_memory_max_size (str): Sets variable memory max size. Default: "5GB".
|
||||
|
||||
|
@ -547,10 +529,8 @@ def set_context(**kwargs):
|
|||
>>> context.set_context(enable_mem_reuse=True)
|
||||
>>> context.set_context(enable_reduce_precision=True)
|
||||
>>> context.set_context(save_ms_model=True, save_ms_model_path=".")
|
||||
>>> context.set_context(enable_gpu_summary=False)
|
||||
>>> context.set_context(enable_dump=True, save_dump_path=".")
|
||||
>>> context.set_context(reserve_class_name_in_scope=True)
|
||||
>>> context.set_context(enable_dynamic_memory=True)
|
||||
>>> context.set_context(graph_memory_max_size="25GB")
|
||||
>>> context.set_context(variable_memory_max_size="6GB")
|
||||
>>> context.set_context(mode=context.GRAPH_MODE,
|
||||
|
|
|
@ -34,6 +34,8 @@ from mindspore.nn import TrainOneStepCell, WithLossCell
|
|||
from mindspore.nn import Dense
|
||||
from mindspore import amp
|
||||
|
||||
context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
|
||||
|
||||
|
||||
def random_normal_init(shape, mean=0.0, stddev=0.01, seed=None):
|
||||
init_value = np.ones(shape).astype(np.float32) * 0.01
|
||||
|
@ -324,7 +326,6 @@ def resnet50(num_classes):
|
|||
@pytest.mark.platform_x86_gpu_training
|
||||
@pytest.mark.env_onecard
|
||||
def test_trainTensor(num_classes=10, epoch=8, batch_size=1):
|
||||
context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
|
||||
net = resnet50(num_classes)
|
||||
lr = 0.1
|
||||
momentum = 0.9
|
||||
|
@ -345,8 +346,6 @@ def test_trainTensor(num_classes=10, epoch=8, batch_size=1):
|
|||
@pytest.mark.platform_x86_gpu_training
|
||||
@pytest.mark.env_onecard
|
||||
def test_trainTensor_amp(num_classes=10, epoch=18, batch_size=16):
|
||||
context.set_context(mode=context.GRAPH_MODE, device_target="GPU", enable_mem_reuse=False,
|
||||
enable_dynamic_memory=False)
|
||||
net = resnet50(num_classes)
|
||||
lr = 0.1
|
||||
momentum = 0.9
|
||||
|
|
Loading…
Reference in New Issue