change ir save path to: save_graphs_path/ when alone; save_graphs_path/rank_ when distribute

2021-08-28 10:02:12 +08:00 · 2021-08-28 10:02:12 +08:00 · 88601da733
parent dd0cec9f7f
commit 88601da733
7 changed files with 25 additions and 14 deletions
--- a/mindspore/ccsrc/debug/common.h
+++ b/mindspore/ccsrc/debug/common.h
@ -59,7 +59,10 @@ inline std::string GetSaveGraphsPathName(const std::string &file_name, const std
  } else {
    save_graphs_path = save_path;
  }
-  return save_graphs_path + "/rank_" + std::to_string(GetRank()) + "/ir_dump/" + file_name;
+  if (IsStandAlone()) {
+    return save_graphs_path + "/" + file_name;
+  }
+  return save_graphs_path + "/rank_" + std::to_string(GetRank()) + "/" + file_name;
 }
 }  // namespace mindspore
 #endif  // MINDSPORE_CCSRC_DEBUG_COMMON_H_
--- a/mindspore/ccsrc/utils/comm_manager.cc
+++ b/mindspore/ccsrc/utils/comm_manager.cc
@ -233,4 +233,10 @@ uint32_t GetRank() {
  }
  return rank_id;
 }
+
+bool IsStandAlone() {
+  auto parallel_context = parallel::ParallelContext::GetInstance();
+  MS_EXCEPTION_IF_NULL(parallel_context);
+  return parallel_context->parallel_mode() == parallel::STAND_ALONE;
+}
 }  // namespace mindspore
--- a/mindspore/ccsrc/utils/comm_manager.h
+++ b/mindspore/ccsrc/utils/comm_manager.h
@ -45,5 +45,7 @@ class CommManager {
 };

 uint32_t GetRank();
+
+bool IsStandAlone();
 }  // namespace mindspore
 #endif  // MINDSPORE_CCSRC_UTILS_COMMUNICATION_MANAGER_H
--- a/mindspore/context.py
+++ b/mindspore/context.py
@ -447,6 +447,7 @@ def set_auto_parallel_context(**kwargs):
    """
    _set_auto_parallel_context(**kwargs)

+
 def get_auto_parallel_context(attr_key):
    """
    Get auto parallel context attribute value according to the key.
@ -566,9 +567,8 @@ def set_context(**kwargs):
        save_graphs (bool): Whether to save graphs. Default: False.
        save_graphs_path (str): Path to save graphs. Default: ".".

-             Graphs will be saved to the directory of `save_graphs_path/rank_${rank_id}/ir_dump`.
-             When distributed training, `rank_id` is the ID of the current device in the cluster.
-             Otherwise, `rank_id` is `0`.
+             During distributed training, graphs will be saved to the directory of
+             `save_graphs_path/rank_${rank_id}/`. `rank_id` is the ID of the current device in the cluster.
        enable_graph_kernel (bool): Whether to enable graph kernel fusion to optimize network execution performance.
             Default: False.
        graph_kernel_flags (str): Optimization options of graph kernel fusion. Experienced user only.
--- a/tests/st/auto_monad/test_auto_monad.py
+++ b/tests/st/auto_monad/test_auto_monad.py
@ -1214,7 +1214,7 @@ def find_newest_validateir_file(folder_path):


 def read_file():
-    filename = find_newest_validateir_file('./rank_0/ir_dump')
+    filename = find_newest_validateir_file('./')
    with open((os.path.join(filename)), 'r') as f:
        content = f.read()
    return content
--- a/tests/st/auto_monad/test_auto_monad_gpu.py
+++ b/tests/st/auto_monad/test_auto_monad_gpu.py
@ -136,7 +136,7 @@ def test_side_effect_castall():
    inputs1 = np.random.randn(5, 5)
    inputs2 = np.random.randn(5, 5)
    net(Tensor(inputs1, ms.float32), Tensor(inputs2, ms.float32))
-    result = find_files('./rank_0/ir_dump/hwopt*cast_all*.ir', 'CastAll')
+    result = find_files('./hwopt*cast_all*.ir', 'CastAll')
    assert result == '2'


@ -348,9 +348,9 @@ def test_ir_fusion_inplace_bn_conv_conv():
                                  keep_batchnorm_fp32=False)
    net.set_train()
    net(Tensor(input_np), Tensor(label))
-    find_accum = find_files("./rank_0/ir_dump/hwopt*cudnn_inplace*ir",
+    find_accum = find_files("./hwopt*cudnn_inplace*ir",
                            "inplace_algo: accumulation")
-    find_cover = find_files("./rank_0/ir_dump/hwopt*cudnn_inplace*ir",
+    find_cover = find_files("./hwopt*cudnn_inplace*ir",
                            "inplace_algo: cover")
    assert find_accum == '1'
    assert find_cover == '1'
@ -372,7 +372,7 @@ def find_newest_validateir_file(folder_path):


 def read_file():
-    filename = find_newest_validateir_file('./rank_0/ir_dump/')
+    filename = find_newest_validateir_file('./')
    with open((os.path.join(filename)), 'r') as f:
        content = f.read()
    clean_all_ir_files('./')
--- a/tests/st/mix_precision/test_mix_precision.py
+++ b/tests/st/mix_precision/test_mix_precision.py
@ -126,15 +126,15 @@ def test_sit_auto_mix_precision_model_o0():
    loss = nn.SoftmaxCrossEntropyWithLogits(sparse=False)
    model = Model(net, loss, opt, amp_level="O0")
    model.train(1, dataset1, dataset_sink_mode=False)
-    contend = read_validateir_file('./test_amp_o0/rank_0/ir_dump')
+    contend = read_validateir_file('./test_amp_o0/')
    castnum = re.findall(r"Cast\(", contend)
    assert len(castnum) == 5
    clean_all_ir_files('./test_amp_o0')
    model.predict(Tensor(input_data))
-    contend = read_validateir_file('./test_amp_o0/rank_0/ir_dump')
+    contend = read_validateir_file('./test_amp_o0/')
    castnum = re.findall(r"Cast\(", contend)
    assert len(castnum) == 11
-    clean_all_ir_files('./test_amp_o0/rank_0/ir_dump')
+    clean_all_ir_files('./test_amp_o0/')


@pytest.mark.level0
@ -162,10 +162,10 @@ def test_sit_auto_mix_precision_model_o2():
    loss = nn.SoftmaxCrossEntropyWithLogits(sparse=False)
    model = Model(net, loss, opt, amp_level="O2")
    model.train(1, dataset1, dataset_sink_mode=False)
-    contend = read_validateir_file('./test_amp_o2/rank_0/ir_dump')
+    contend = read_validateir_file('./test_amp_o2/')
    castnum = re.findall(r"Cast\(", contend)
    assert len(castnum) == 14
-    clean_all_ir_files('./test_amp_o2/rank_0/ir_dump')
+    clean_all_ir_files('./test_amp_o2/')
    out_graph = model.predict(Tensor(input_data))

    # pynative mode