delete macro of profiler
Signed-off-by: zhoufeng <zhoufeng54@huawei.com>
This commit is contained in:
parent
32d5f1e1e4
commit
c80c779f55
|
@ -183,9 +183,8 @@ ms_protobuf_generate(DISTRIBUTED_CLUSTER_TOPOLOGY_SRCS DISTRIBUTED_CLUSTER_TOPOL
|
|||
list(APPEND MINDSPORE_PROTO_LIST ${DISTRIBUTED_CLUSTER_TOPOLOGY_SRCS})
|
||||
|
||||
if(NOT ENABLE_SECURITY)
|
||||
include_directories("${CMAKE_BINARY_DIR}/profiler/device/ascend")
|
||||
file(GLOB_RECURSE PROFILER_PROTO_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
|
||||
"profiler/device/ascend/memory_profiling.proto")
|
||||
"plugin/device/ascend/hal/profiler/memory_profiling.proto")
|
||||
ms_protobuf_generate_py(PROFILER_MEM_PROTO_PY PROFILER_MEM_PROTO_HDRS_PY PROFILER_MEM_PROTO_PYS
|
||||
${PROFILER_PROTO_LIST})
|
||||
list(APPEND MINDSPORE_PROTO_LIST ${PROFILER_MEM_PROTO_PY})
|
||||
|
@ -333,14 +332,17 @@ set(BACKEND_SUB_COMP
|
|||
plugin/device/ascend/hal/device
|
||||
plugin/device/ascend/hal/hardware
|
||||
plugin/device/ascend/hal/hccl_adapter
|
||||
plugin/device/ascend/hal/profiler
|
||||
plugin/device/ascend/kernel
|
||||
plugin/device/ascend/optimizer
|
||||
plugin/device/cpu/hal/device
|
||||
plugin/device/cpu/hal/hardware
|
||||
plugin/device/cpu/hal/profiler
|
||||
plugin/device/cpu/kernel
|
||||
plugin/device/cpu/optimizer
|
||||
plugin/device/gpu/hal/device
|
||||
plugin/device/gpu/hal/hardware
|
||||
plugin/device/gpu/hal/profiler
|
||||
plugin/device/gpu/kernel
|
||||
plugin/device/gpu/optimizer
|
||||
)
|
||||
|
|
|
@ -75,7 +75,7 @@
|
|||
#endif
|
||||
#include "plugin/device/ascend/hal/device/ascend_bucket.h"
|
||||
#ifndef ENABLE_SECURITY
|
||||
#include "profiler/device/ascend/memory_profiling.h"
|
||||
#include "plugin/device/ascend/hal/profiler/memory_profiling.h"
|
||||
|
||||
using Adx::AdxRegDumpProcessCallBack;
|
||||
using mindspore::device::ascend::ProfilingManager;
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
#endif
|
||||
#include "include/common/thread_pool.h"
|
||||
#ifndef ENABLE_SECURITY
|
||||
#include "profiler/device/ascend/memory_profiling.h"
|
||||
#include "plugin/device/ascend/hal/profiler/memory_profiling.h"
|
||||
|
||||
using mindspore::profiler::ascend::MemoryProfiling;
|
||||
using mindspore::profiler::ascend::NodeMemory;
|
||||
|
|
|
@ -68,7 +68,7 @@
|
|||
|
||||
#ifndef ENABLE_SECURITY
|
||||
#ifdef ENABLE_D
|
||||
#include "mindspore/ccsrc/profiler/device/ascend/parallel_strategy_profiling.h"
|
||||
#include "plugin/device/ascend/hal/profiler/parallel_strategy_profiling.h"
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
|
|
@ -21,7 +21,7 @@
|
|||
#include "acl/acl_rt.h"
|
||||
#ifndef ENABLE_SECURITY
|
||||
#include "plugin/device/ascend/hal/device/profiling/profiling_manager.h"
|
||||
#include "profiler/device/ascend/memory_profiling.h"
|
||||
#include "plugin/device/ascend/hal/profiler/memory_profiling.h"
|
||||
|
||||
using mindspore::device::ascend::ProfilingManager;
|
||||
using mindspore::profiler::ascend::MemoryProfiling;
|
||||
|
|
|
@ -24,7 +24,7 @@
|
|||
#include "plugin/device/ascend/hal/device/ascend_stream_assign.h"
|
||||
#include "plugin/device/ascend/hal/device/ascend_memory_adapter.h"
|
||||
#ifndef ENABLE_SECURITY
|
||||
#include "profiler/device/ascend/memory_profiling.h"
|
||||
#include "plugin/device/ascend/hal/profiler/memory_profiling.h"
|
||||
using mindspore::profiler::ascend::MemoryProfiling;
|
||||
#endif
|
||||
|
||||
|
|
|
@ -36,11 +36,11 @@
|
|||
#include "include/common/debug/dump_proto.h"
|
||||
#include "debug/data_dump/e2e_dump.h"
|
||||
#include "debug/debugger/debugger_utils.h"
|
||||
#include "profiler/device/ascend/memory_profiling.h"
|
||||
#include "plugin/device/ascend/hal/profiler/memory_profiling.h"
|
||||
#include "plugin/device/ascend/hal/device/profiling/profiling_manager.h"
|
||||
#include "utils/anf_utils.h"
|
||||
#include "profiler/device/ascend/pynative_profiling.h"
|
||||
#include "profiler/device/ascend/ascend_profiling.h"
|
||||
#include "plugin/device/ascend/hal/profiler/pynative_profiling.h"
|
||||
#include "plugin/device/ascend/hal/profiler/ascend_profiling.h"
|
||||
|
||||
using Adx::AdxRegDumpProcessCallBack;
|
||||
using mindspore::device::ascend::ProfilingManager;
|
||||
|
@ -432,7 +432,7 @@ bool AscendKernelExecutor::LaunchKernel(const CNodePtr &kernel, const vector<Add
|
|||
MS_EXCEPTION_IF_NULL(profiler_manage_instance);
|
||||
if ((profiler_manage_instance->GetNetDynamicShapeStatus() ||
|
||||
ms_context->get_param<int>(MS_CTX_EXECUTION_MODE) == kGraphMode) &&
|
||||
ascend_instance->GetProfilingEnableFlag()) {
|
||||
ascend_instance->GetEnableFlag()) {
|
||||
ascend_instance->GetNodeTaskIdStreamId(kernel, graph_id, device_id, kernel_type);
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,7 @@
|
|||
if(NOT ENABLE_SECURITY AND ENABLE_D)
|
||||
file(GLOB_RECURSE PROFILER_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
|
||||
|
||||
set_property(SOURCE ${PROFILER_SRC_LIST} PROPERTY COMPILE_DEFINITIONS
|
||||
SUBMODULE_ID=mindspore::SubModuleId::SM_PROFILER)
|
||||
add_library(_mindspore_plugin_device_ascend_hal_profiler_obj OBJECT ${PROFILER_SRC_LIST})
|
||||
endif()
|
|
@ -13,21 +13,19 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "profiler/device/ascend/ascend_profiling.h"
|
||||
#include "plugin/device/ascend/hal/profiler/ascend_profiling.h"
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "common/util/error_manager/error_manager.h"
|
||||
#include "include/common/pybind_api/api_register.h"
|
||||
#include "utils/log_adapter.h"
|
||||
#include "include/common/utils/utils.h"
|
||||
#include "profiler/device/ascend/memory_profiling.h"
|
||||
#include "plugin/device/ascend/hal/profiler/memory_profiling.h"
|
||||
#include "plugin/device/ascend/hal/device/profiling/profiling_manager.h"
|
||||
#include "profiler/device/ascend/parallel_strategy_profiling.h"
|
||||
#include "plugin/device/ascend/hal/profiler/parallel_strategy_profiling.h"
|
||||
#include <nlohmann/json.hpp>
|
||||
#include "plugin/device/ascend/hal/device/profiling/profiling_reporter.h"
|
||||
#include "kernel/kernel.h"
|
||||
#include "backend/common/session/kernel_graph.h"
|
||||
#include "acl/acl_rt.h"
|
||||
|
||||
using mindspore::device::ascend::ProfilingManager;
|
||||
|
@ -37,8 +35,12 @@ using mindspore::profiler::ascend::MemoryProfiling;
|
|||
namespace mindspore {
|
||||
namespace profiler {
|
||||
namespace ascend {
|
||||
namespace {
|
||||
constexpr auto kUnknownErrorString = "Unknown error occurred";
|
||||
|
||||
PROFILER_REG(kAscendDevice, AscendProfiler);
|
||||
} // namespace
|
||||
|
||||
std::map<std::string, aclprofAicoreMetrics> kAicMetrics{
|
||||
{"ArithmeticUtilization", ACL_AICORE_ARITHMETIC_UTILIZATION},
|
||||
{"PipeUtilization", ACL_AICORE_PIPE_UTILIZATION},
|
||||
|
@ -47,9 +49,11 @@ std::map<std::string, aclprofAicoreMetrics> kAicMetrics{
|
|||
{"ResourceConflictRatio", ACL_AICORE_RESOURCE_CONFLICT_RATIO},
|
||||
};
|
||||
|
||||
std::shared_ptr<AscendProfiler> AscendProfiler::ascend_profiler_ = std::make_shared<AscendProfiler>();
|
||||
|
||||
std::shared_ptr<AscendProfiler> &AscendProfiler::GetInstance() { return ascend_profiler_; }
|
||||
std::shared_ptr<AscendProfiler> AscendProfiler::GetInstance() {
|
||||
auto instance = Profiler::GetInstance(kAscendDevice);
|
||||
MS_EXCEPTION_IF_NULL(instance);
|
||||
return std::dynamic_pointer_cast<AscendProfiler>(instance);
|
||||
}
|
||||
|
||||
void AscendProfiler::ReportErrorMessage() const {
|
||||
const std::string &error_message = ErrorManager::GetInstance().GetErrorMessage();
|
||||
|
@ -63,8 +67,7 @@ void AscendProfiler::StepProfilingEnable(const bool enable_flag) {
|
|||
enable_flag_ = enable_flag;
|
||||
}
|
||||
|
||||
void AscendProfiler::InitProfiling(const std::string &profiling_path, uint32_t device_id,
|
||||
const std::string &profiling_options) {
|
||||
void AscendProfiler::Init(const std::string &profiling_path, uint32_t device_id, const std::string &profiling_options) {
|
||||
MS_LOG(INFO) << "Begin to init profiling and call aclprofInit function.";
|
||||
profiling_options_ = profiling_options;
|
||||
profile_data_path_ = profiling_path;
|
||||
|
@ -183,7 +186,7 @@ void AscendProfiler::Stop() {
|
|||
StepProfilingEnable(false);
|
||||
}
|
||||
|
||||
void AscendProfiler::Finalize() const {
|
||||
void AscendProfiler::Finalize() {
|
||||
MS_LOG(INFO) << "Begin to finalize profiling";
|
||||
aclError aclRet = aclprofFinalize();
|
||||
if (aclRet != ACL_SUCCESS) {
|
||||
|
@ -220,16 +223,6 @@ void AscendProfiler::GetNodeTaskIdStreamId(const CNodePtr &kernel, uint32_t grap
|
|||
last_tid[t_id] = task_id;
|
||||
last_streamid[t_id] = stream_id;
|
||||
}
|
||||
|
||||
REGISTER_PYBIND_DEFINE(AscendProfiler_, ([](const py::module *m) {
|
||||
(void)py::class_<AscendProfiler, std::shared_ptr<AscendProfiler>>(*m, "AscendProfiler")
|
||||
.def_static("get_instance", &AscendProfiler::GetInstance, "AscendProfiler get_instance.")
|
||||
.def("init", &AscendProfiler::InitProfiling, py::arg("profiling_path"), py::arg("device_id"),
|
||||
py::arg("profiling_options"), "init")
|
||||
.def("start", &AscendProfiler::Start, "start")
|
||||
.def("stop", &AscendProfiler::Stop, "stop")
|
||||
.def("finalize", &AscendProfiler::Finalize, "finalize");
|
||||
}));
|
||||
} // namespace ascend
|
||||
} // namespace profiler
|
||||
} // namespace mindspore
|
|
@ -13,8 +13,8 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_PROFILER_DEVICE_ASCEND_PROFILING_H
|
||||
#define MINDSPORE_CCSRC_PROFILER_DEVICE_ASCEND_PROFILING_H
|
||||
#ifndef MINDSPORE_CCSRC_PLUGIN_DEVICE_ASCEND_HAL_PROFILER_ASCEND_PROFILING_H
|
||||
#define MINDSPORE_CCSRC_PLUGIN_DEVICE_ASCEND_HAL_PROFILER_ASCEND_PROFILING_H
|
||||
#include <string>
|
||||
#include <memory>
|
||||
#include <map>
|
||||
|
@ -28,35 +28,30 @@ namespace profiler {
|
|||
namespace ascend {
|
||||
class AscendProfiler : public Profiler {
|
||||
public:
|
||||
static std::shared_ptr<AscendProfiler> &GetInstance();
|
||||
AscendProfiler() : profiling_options_("") {}
|
||||
static std::shared_ptr<AscendProfiler> GetInstance();
|
||||
|
||||
AscendProfiler() {}
|
||||
~AscendProfiler() = default;
|
||||
AscendProfiler(const AscendProfiler &) = delete;
|
||||
AscendProfiler &operator=(const AscendProfiler &) = delete;
|
||||
void Init(const std::string &profileDataPath) { return; }
|
||||
void InitProfiling(const std::string &profiling_path, uint32_t device_id, const std::string &profiling_options);
|
||||
void Stop();
|
||||
void Init(const std::string &profiling_path, uint32_t device_id, const std::string &profiling_options) override;
|
||||
void Finalize() override;
|
||||
void Start() override;
|
||||
void Stop() override;
|
||||
void StepProfilingEnable(const bool enable_flag) override;
|
||||
void OpDataProducerEnd() { return; }
|
||||
void Start();
|
||||
bool GetProfilingEnableFlag() const { return enable_flag_; }
|
||||
std::string GetProfilingOptions() const { return profiling_options_; }
|
||||
void OpDataProducerEnd() override { return; }
|
||||
uint64_t GetOptionsMask() const;
|
||||
aclprofAicoreMetrics GetAicMetrics() const;
|
||||
void Finalize() const;
|
||||
bool IsInitialized() const { return init_flag_; }
|
||||
void ReportErrorMessage() const;
|
||||
void GetNodeTaskIdStreamId(const CNodePtr &kernel, uint32_t graph_id, int device_id, const KernelType kernel_type);
|
||||
std::map<std::thread::id, uint32_t> last_tid;
|
||||
std::map<std::thread::id, uint32_t> last_streamid;
|
||||
|
||||
protected:
|
||||
void SaveProfileData() { return; }
|
||||
void ClearInst() { return; }
|
||||
void SaveProfileData() override { return; }
|
||||
void ClearInst() override { return; }
|
||||
|
||||
private:
|
||||
static std::shared_ptr<AscendProfiler> ascend_profiler_;
|
||||
std::string profiling_options_;
|
||||
uint32_t device_id_ = 0;
|
||||
uint32_t aicpu_kernel_type_ = 2;
|
||||
uint32_t max_op_taskid_limit_ = 65536;
|
||||
|
@ -65,4 +60,4 @@ class AscendProfiler : public Profiler {
|
|||
} // namespace ascend
|
||||
} // namespace profiler
|
||||
} // namespace mindspore
|
||||
#endif
|
||||
#endif // MINDSPORE_CCSRC_PLUGIN_DEVICE_ASCEND_HAL_PROFILER_ASCEND_PROFILING_H
|
|
@ -14,15 +14,15 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "profiler/device/ascend/memory_profiling.h"
|
||||
#include "plugin/device/ascend/hal/profiler/memory_profiling.h"
|
||||
#include <fstream>
|
||||
#include <memory>
|
||||
#include "utils/log_adapter.h"
|
||||
#include "utils/ms_context.h"
|
||||
#include "utils/ms_utils.h"
|
||||
#include "nlohmann/json.hpp"
|
||||
#include "profiler/device/ascend/ascend_profiling.h"
|
||||
#include "profiler/device/ascend/options.h"
|
||||
#include "plugin/device/ascend/hal/profiler/ascend_profiling.h"
|
||||
#include "plugin/device/ascend/hal/profiler/options.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace profiler {
|
|
@ -14,8 +14,8 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_PROFILER_DEVICE_COMMON_PROFILING_MEMORY_H
|
||||
#define MINDSPORE_PROFILER_DEVICE_COMMON_PROFILING_MEMORY_H
|
||||
#ifndef MINDSPORE_CCSRC_PLUGIN_DEVICE_ASCEND_HAL_PROFILER_PROFILING_MEMORY_H
|
||||
#define MINDSPORE_CCSRC_PLUGIN_DEVICE_ASCEND_HAL_PROFILER_PROFILING_MEMORY_H
|
||||
|
||||
#include "proto/memory_profiling.pb.h"
|
||||
#include <string>
|
||||
|
@ -130,4 +130,4 @@ class MemoryProfiling {
|
|||
} // namespace ascend
|
||||
} // namespace profiler
|
||||
} // namespace mindspore
|
||||
#endif
|
||||
#endif // MINDSPORE_CCSRC_PLUGIN_DEVICE_ASCEND_HAL_PROFILER_PROFILING_MEMORY_H
|
|
@ -15,10 +15,10 @@
|
|||
|
||||
#include <string>
|
||||
#include <climits>
|
||||
#include "profiler/device/ascend/options.h"
|
||||
#include "plugin/device/ascend/hal/profiler/options.h"
|
||||
#include "utils/ms_context.h"
|
||||
#include "include/common/debug/common.h"
|
||||
#include "profiler/device/ascend/ascend_profiling.h"
|
||||
#include "plugin/device/ascend/hal/profiler/ascend_profiling.h"
|
||||
|
||||
constexpr char kOutputPath[] = "output";
|
||||
|
||||
|
@ -26,7 +26,7 @@ namespace mindspore {
|
|||
namespace profiler {
|
||||
namespace ascend {
|
||||
std::string GetOutputPath() {
|
||||
auto ascend_profiler = AscendProfiler::GetInstance();
|
||||
auto ascend_profiler = Profiler::GetInstance(kAscendDevice);
|
||||
MS_EXCEPTION_IF_NULL(ascend_profiler);
|
||||
const std::string options_str = ascend_profiler->GetProfilingOptions();
|
||||
nlohmann::json options_json;
|
|
@ -13,8 +13,8 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_OPTIONS_H
|
||||
#define MINDSPORE_OPTIONS_H
|
||||
#ifndef MINDSPORE_CCSRC_PLUGIN_DEVICE_ASCEND_HAL_PROFILER_OPTIONS_H
|
||||
#define MINDSPORE_CCSRC_PLUGIN_DEVICE_ASCEND_HAL_PROFILER_OPTIONS_H
|
||||
|
||||
#include <string>
|
||||
|
||||
|
@ -28,4 +28,4 @@ nlohmann::json GetContextProfilingOption();
|
|||
} // namespace ascend
|
||||
} // namespace profiler
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_OPTIONS_H
|
||||
#endif // MINDSPORE_CCSRC_PLUGIN_DEVICE_ASCEND_HAL_PROFILER_OPTIONS_H
|
|
@ -13,15 +13,15 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "profiler/device/ascend/parallel_strategy_profiling.h"
|
||||
#include "plugin/device/ascend/hal/profiler/parallel_strategy_profiling.h"
|
||||
|
||||
#include <vector>
|
||||
#include "sys/stat.h"
|
||||
|
||||
#include "include/common/debug/dump_proto.h"
|
||||
#include "include/common/utils/parallel_context.h"
|
||||
#include "profiler/device/ascend/options.h"
|
||||
#include "profiler/device/ascend/ascend_profiling.h"
|
||||
#include "plugin/device/ascend/hal/profiler/options.h"
|
||||
#include "plugin/device/ascend/hal/profiler/ascend_profiling.h"
|
||||
#include "proto/profiling_parallel.pb.h"
|
||||
#include "utils/ms_context.h"
|
||||
#include "include/common/utils/utils.h"
|
||||
|
@ -29,7 +29,7 @@
|
|||
|
||||
#include "google/protobuf/util/json_util.h"
|
||||
|
||||
#if ((defined ENABLE_CPU) && (!defined _WIN32))
|
||||
#ifdef WITH_BACKEND
|
||||
#include "ps/ps_context.h"
|
||||
#include "ps/util.h"
|
||||
#endif
|
||||
|
@ -43,14 +43,14 @@ bool profiling_parallel_strategy_enabled = true;
|
|||
irpb::ProfilingParallel cache_profiling_parallel_pb;
|
||||
|
||||
bool IsProfilingParallelStrategyEnabled() {
|
||||
auto ascend_profiler = AscendProfiler::GetInstance();
|
||||
auto ascend_profiler = Profiler::GetInstance(kAscendDevice);
|
||||
MS_EXCEPTION_IF_NULL(ascend_profiler);
|
||||
if (!profiling_parallel_strategy_enabled || !ascend_profiler->IsInitialized()) {
|
||||
MS_LOG(INFO) << "Profiling parallel strategy is disabled.";
|
||||
return false;
|
||||
}
|
||||
|
||||
#if ((defined ENABLE_CPU) && (!defined _WIN32))
|
||||
#ifdef WITH_BACKEND
|
||||
if (ps::PSContext::instance()->is_server() || ps::PSContext::instance()->is_scheduler()) {
|
||||
MS_LOG(INFO) << "Current is ps server or ps scheduler, profiling parallel "
|
||||
"strategy is disabled.";
|
||||
|
@ -146,9 +146,9 @@ void DumpProfileParallelStrategy(const FuncGraphPtr &func_graph) {
|
|||
|
||||
cache_profiling_parallel_pb = GetProfilingParallel(func_graph);
|
||||
|
||||
auto ascend_profiler = AscendProfiler::GetInstance();
|
||||
auto ascend_profiler = Profiler::GetInstance(kAscendDevice);
|
||||
MS_EXCEPTION_IF_NULL(ascend_profiler);
|
||||
if (!ascend_profiler->GetProfilingEnableFlag()) {
|
||||
if (!ascend_profiler->GetEnableFlag()) {
|
||||
MS_LOG(INFO) << "Profiling parallel strategy has not started.";
|
||||
return;
|
||||
}
|
|
@ -13,8 +13,8 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_PARALLEL_STRATEGY_PROFILING_H
|
||||
#define MINDSPORE_PARALLEL_STRATEGY_PROFILING_H
|
||||
#ifndef MINDSPORE_CCSRC_PLUGIN_DEVICE_ASCEND_HAL_PROFILER_PARALLEL_STRATEGY_PROFILING_H
|
||||
#define MINDSPORE_CCSRC_PLUGIN_DEVICE_ASCEND_HAL_PROFILER_PARALLEL_STRATEGY_PROFILING_H
|
||||
|
||||
#include <string>
|
||||
|
||||
|
@ -31,4 +31,4 @@ void SaveParallelStrategyToFile();
|
|||
} // namespace profiler
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // MINDSPORE_PARALLEL_STRATEGY_PROFILING_H
|
||||
#endif // MINDSPORE_CCSRC_PLUGIN_DEVICE_ASCEND_HAL_PROFILER_PARALLEL_STRATEGY_PROFILING_H
|
|
@ -21,23 +21,28 @@
|
|||
|
||||
#include "include/common/utils/utils.h"
|
||||
#include "profiler/device/profiling.h"
|
||||
#include "profiler/device/ascend/pynative_profiling.h"
|
||||
#include "plugin/device/ascend/hal/profiler/pynative_profiling.h"
|
||||
#include "include/common/pybind_api/api_register.h"
|
||||
#include "mindspore/core/utils/file_utils.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace profiler {
|
||||
namespace ascend {
|
||||
std::shared_ptr<PynativeProfiler> PynativeProfiler::profiler_inst_ = std::make_shared<PynativeProfiler>();
|
||||
namespace {
|
||||
constexpr auto kPyNativeName = "PyNative";
|
||||
|
||||
std::shared_ptr<PynativeProfiler> &PynativeProfiler::GetInstance() {
|
||||
MS_EXCEPTION_IF_NULL(profiler_inst_);
|
||||
return profiler_inst_;
|
||||
PROFILER_REG(kPyNativeName, PynativeProfiler);
|
||||
} // namespace
|
||||
|
||||
std::shared_ptr<PynativeProfiler> PynativeProfiler::GetInstance() {
|
||||
auto instance = Profiler::GetInstance(kPyNativeName);
|
||||
MS_EXCEPTION_IF_NULL(instance);
|
||||
return std::dynamic_pointer_cast<PynativeProfiler>(instance);
|
||||
}
|
||||
|
||||
void PynativeProfiler::Init(const std::string &profileDataPath) {
|
||||
void PynativeProfiler::Init(const std::string &profiling_path, uint32_t, const std::string &) {
|
||||
MS_LOG(INFO) << "Initialize pynatiave Ascend Profiling";
|
||||
profile_data_path_ = profileDataPath;
|
||||
profile_data_path_ = profiling_path;
|
||||
enable_flag_ = true;
|
||||
std::string device_id = common::GetEnv("RANK_ID");
|
||||
if (device_id.empty()) {
|
||||
|
@ -137,7 +142,7 @@ void PynativeProfiler::OpDataProducerEnd(std::thread::id thread_id, bool is_dyna
|
|||
op_info.start->ElapsedTime(&cost_time, op_info.end.get());
|
||||
|
||||
op_info.duration = cost_time;
|
||||
int64_t milli_second_ratio = 1000;
|
||||
constexpr int64_t milli_second_ratio = 1000;
|
||||
int64_t end_timestamp = GetRealTimeStamp();
|
||||
int64_t start_timestamp = end_timestamp - static_cast<int64_t>(cost_time * milli_second_ratio);
|
||||
double_t start_t = static_cast<double_t>(start_timestamp) / milli_second_ratio;
|
||||
|
@ -177,13 +182,6 @@ void PynativeProfiler::WriteOpDetail(const std::string &out_path_dir) {
|
|||
}
|
||||
|
||||
int PynativeProfiler::NewThreadIndex() { return thread_op_info_map_.size() + 1; }
|
||||
|
||||
REGISTER_PYBIND_DEFINE(PynativeProfiler_, ([](const py::module *m) {
|
||||
(void)py::class_<PynativeProfiler, std::shared_ptr<PynativeProfiler>>(*m, "PynativeProfiler")
|
||||
.def_static("get_instance", &PynativeProfiler::GetInstance, "PynativeProfiler get_instance.")
|
||||
.def("init", &PynativeProfiler::Init, py::arg("profile_data_path"), "init")
|
||||
.def("stop", &PynativeProfiler::Stop, "stop");
|
||||
}));
|
||||
} // namespace ascend
|
||||
} // namespace profiler
|
||||
} // namespace mindspore
|
|
@ -13,8 +13,8 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_PROFILER_DEVICE_ASCEND_PYNATIVE_PROFILING_H_
|
||||
#define MINDSPORE_CCSRC_PROFILER_DEVICE_ASCEND_PYNATIVE_PROFILING_H_
|
||||
#ifndef MINDSPORE_CCSRC_PLUGIN_DEVICE_ASCEND_HAL_PROFILER_PYNATIVE_PROFILING_H_
|
||||
#define MINDSPORE_CCSRC_PLUGIN_DEVICE_ASCEND_HAL_PROFILER_PYNATIVE_PROFILING_H_
|
||||
|
||||
#include <cstdio>
|
||||
#include <map>
|
||||
|
@ -45,10 +45,12 @@ struct PynativeOpInfo {
|
|||
|
||||
class MS_CORE_API PynativeProfiler : public Profiler {
|
||||
public:
|
||||
static std::shared_ptr<PynativeProfiler> &GetInstance();
|
||||
static std::shared_ptr<PynativeProfiler> GetInstance();
|
||||
PynativeProfiler() = default;
|
||||
~PynativeProfiler() {}
|
||||
void Init(const std::string &profileDataPath) override;
|
||||
void Init(const std::string &profiling_path, uint32_t device_id, const std::string &profiling_options) override;
|
||||
void Finalize() override {}
|
||||
void Start() override {}
|
||||
void Stop() override;
|
||||
void OpDataProducerBegin(AscendKernelRuntime *runtime_instance_, void *stream, std::thread::id thread_id,
|
||||
const std::string &op_name, bool is_dynamic_shape);
|
||||
|
@ -63,16 +65,14 @@ class MS_CORE_API PynativeProfiler : public Profiler {
|
|||
void ClearInst() override;
|
||||
int NewThreadIndex();
|
||||
|
||||
static std::shared_ptr<PynativeProfiler> profiler_inst_;
|
||||
std::int32_t rank_id_;
|
||||
std::vector<PynativeOpInfo> pynative_op_info_;
|
||||
bool enable_flag_ = false;
|
||||
const uint64_t kUSecondInSecond = 1000000;
|
||||
const uint64_t milli_second_ratio = 1000;
|
||||
std::map<std::thread::id, PynativeOpInfo> thread_op_info_map_;
|
||||
std::shared_mutex op_map_mutex_;
|
||||
};
|
||||
} // namespace ascend
|
||||
} // namespace profiler
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_CCSRC_PROFILER_DEVICE_ASCEND_PYNATIVE_PROFILING_H_
|
||||
#endif // MINDSPORE_CCSRC_PLUGIN_DEVICE_ASCEND_HAL_PROFILER_PYNATIVE_PROFILING_H_
|
|
@ -32,7 +32,7 @@
|
|||
#include "include/common/utils/anfalgo.h"
|
||||
#include "backend/common/session/session_basic.h"
|
||||
#include "frontend/operator/ops.h"
|
||||
#include "profiler/device/cpu/cpu_profiling.h"
|
||||
#include "plugin/device/cpu/hal/profiler/cpu_profiling.h"
|
||||
#include "utils/shape_utils.h"
|
||||
#include "utils/profile.h"
|
||||
#include "utils/trace_base.h"
|
||||
|
|
|
@ -27,7 +27,7 @@
|
|||
#include "backend/common/session/anf_runtime_algorithm.h"
|
||||
#include "include/common/utils/anfalgo.h"
|
||||
#include "utils/any.h"
|
||||
#include "profiler/device/cpu/cpu_profiling.h"
|
||||
#include "plugin/device/cpu/hal/profiler/cpu_profiling.h"
|
||||
namespace mindspore {
|
||||
namespace device {
|
||||
namespace cpu {
|
||||
|
|
|
@ -41,7 +41,7 @@
|
|||
#include "common/graph_kernel/value_graph_binder.h"
|
||||
#include "backend/common/session/anf_runtime_algorithm.h"
|
||||
#include "include/common/utils/anfalgo.h"
|
||||
#include "profiler/device/cpu/cpu_profiling.h"
|
||||
#include "plugin/device/cpu/hal/profiler/cpu_profiling.h"
|
||||
#ifdef WITH_BACKEND
|
||||
#include "plugin/device/cpu/hal/hardware/ms_collective_comm_lib.h"
|
||||
#endif
|
||||
|
|
|
@ -0,0 +1,7 @@
|
|||
if(NOT ENABLE_SECURITY AND ENABLE_CPU)
|
||||
file(GLOB_RECURSE PROFILER_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
|
||||
|
||||
set_property(SOURCE ${PROFILER_SRC_LIST} PROPERTY COMPILE_DEFINITIONS
|
||||
SUBMODULE_ID=mindspore::SubModuleId::SM_PROFILER)
|
||||
add_library(_mindspore_plugin_device_cpu_hal_profiler_obj OBJECT ${PROFILER_SRC_LIST})
|
||||
endif()
|
|
@ -13,7 +13,7 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "profiler/device/cpu/cpu_data_saver.h"
|
||||
#include "plugin/device/cpu/hal/profiler/cpu_data_saver.h"
|
||||
#include <fstream>
|
||||
#include <numeric>
|
||||
#include "sys/stat.h"
|
||||
|
@ -32,27 +32,29 @@ void CpuDataSaver::WriteFile(const std::string out_path_dir) {
|
|||
MS_LOG(INFO) << "No cpu operation detail infos to write.";
|
||||
return;
|
||||
}
|
||||
#if ENABLE_GPU
|
||||
|
||||
auto context_ptr = MsContext::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(context_ptr);
|
||||
auto device_id = context_ptr->get_param<uint32_t>(MS_CTX_DEVICE_ID);
|
||||
device_id_ = std::to_string(device_id);
|
||||
#else
|
||||
auto rank_id = common::GetEnv("RANK_ID");
|
||||
// If RANK_ID is not set, default value is 0.
|
||||
if (rank_id.empty()) {
|
||||
rank_id = "0";
|
||||
}
|
||||
rank_id = std::string(rank_id);
|
||||
// When the value of RANK_ID is not a number, set its value to 0.
|
||||
for (int i = 0; i < static_cast<int>(rank_id.size()); i++) {
|
||||
if (std::isdigit(rank_id[i]) == 0) {
|
||||
if (context_ptr->get_param<std::string>(MS_CTX_DEVICE_TARGET) == kGPUDevice) {
|
||||
auto device_id = context_ptr->get_param<uint32_t>(MS_CTX_DEVICE_ID);
|
||||
device_id_ = std::to_string(device_id);
|
||||
} else {
|
||||
auto rank_id = common::GetEnv("RANK_ID");
|
||||
// If RANK_ID is not set, default value is 0.
|
||||
if (rank_id.empty()) {
|
||||
rank_id = "0";
|
||||
break;
|
||||
}
|
||||
rank_id = std::string(rank_id);
|
||||
// When the value of RANK_ID is not a number, set its value to 0.
|
||||
for (int i = 0; i < static_cast<int>(rank_id.size()); i++) {
|
||||
if (std::isdigit(rank_id[i]) == 0) {
|
||||
rank_id = "0";
|
||||
break;
|
||||
}
|
||||
}
|
||||
device_id_ = rank_id;
|
||||
}
|
||||
device_id_ = rank_id;
|
||||
#endif
|
||||
|
||||
op_side_ = "cpu";
|
||||
WriteOpDetail(out_path_dir);
|
||||
WriteOpType(out_path_dir);
|
|
@ -14,15 +14,15 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_PROFILER_DEVICE_CPU_CPU_DATA_SAVER_H
|
||||
#define MINDSPORE_CCSRC_PROFILER_DEVICE_CPU_CPU_DATA_SAVER_H
|
||||
#ifndef MINDSPORE_CCSRC_PLUGIN_DEVICE_CPU_HAL_PROFILER_CPU_DATA_SAVER_H
|
||||
#define MINDSPORE_CCSRC_PLUGIN_DEVICE_CPU_HAL_PROFILER_CPU_DATA_SAVER_H
|
||||
#include <iostream>
|
||||
#include <algorithm>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <memory>
|
||||
#include "profiler/device/cpu/cpu_profiling.h"
|
||||
#include "plugin/device/cpu/hal/profiler/cpu_profiling.h"
|
||||
#include "profiler/device/data_saver.h"
|
||||
namespace mindspore {
|
||||
namespace profiler {
|
||||
|
@ -52,4 +52,4 @@ class CpuDataSaver : public DataSaver {
|
|||
} // namespace profiler
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // MINDSPORE_CCSRC_PROFILER_DEVICE_CPU_CPU_DATA_SAVER_H
|
||||
#endif // MINDSPORE_CCSRC_PLUGIN_DEVICE_CPU_HAL_PROFILER_CPU_DATA_SAVER_H
|
|
@ -14,13 +14,8 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "profiler/device/cpu/cpu_profiling.h"
|
||||
|
||||
#include <cxxabi.h>
|
||||
#include <cmath>
|
||||
#include <ctime>
|
||||
#include "profiler/device/cpu/cpu_data_saver.h"
|
||||
#include "include/common/pybind_api/api_register.h"
|
||||
#include "plugin/device/cpu/hal/profiler/cpu_profiling.h"
|
||||
#include "plugin/device/cpu/hal/profiler/cpu_data_saver.h"
|
||||
#include "utils/log_adapter.h"
|
||||
#include "include/common/utils/utils.h"
|
||||
#include "utils/ms_context.h"
|
||||
|
@ -28,13 +23,19 @@
|
|||
namespace mindspore {
|
||||
namespace profiler {
|
||||
namespace cpu {
|
||||
std::shared_ptr<CPUProfiler> CPUProfiler::profiler_inst_ = std::make_shared<CPUProfiler>();
|
||||
std::shared_ptr<CPUProfiler> &CPUProfiler::GetInstance() { return profiler_inst_; }
|
||||
namespace {
|
||||
PROFILER_REG(kCPUDevice, CPUProfiler);
|
||||
} // namespace
|
||||
std::shared_ptr<CPUProfiler> CPUProfiler::GetInstance() {
|
||||
auto instance = Profiler::GetInstance(kCPUDevice);
|
||||
MS_EXCEPTION_IF_NULL(instance);
|
||||
return std::dynamic_pointer_cast<CPUProfiler>(instance);
|
||||
}
|
||||
|
||||
void CPUProfiler::Init(const std::string &profileDataPath = "") {
|
||||
void CPUProfiler::Init(const std::string &profiling_path, uint32_t, const std::string &) {
|
||||
MS_LOG(INFO) << "Initialize CPU Profiling";
|
||||
base_time_ = GetHostMonoTimeStamp();
|
||||
profile_data_path_ = profileDataPath;
|
||||
profile_data_path_ = profiling_path;
|
||||
MS_LOG(INFO) << " Host start time(ns): " << base_time_ << " profile data path: " << profile_data_path_;
|
||||
}
|
||||
|
||||
|
@ -108,16 +109,11 @@ void CPUProfiler::OpDataProducerBeginParallel(const std::string op_name, const u
|
|||
SetRunTimeData(op_name, pid, true);
|
||||
SetRuntimeStart(op_name, start_timestamp);
|
||||
|
||||
#if ENABLE_GPU
|
||||
if (MsContext::GetInstance()->get_param<bool>(MS_CTX_ENABLE_MINDRT)) {
|
||||
// For heterogeneous scene, record op name to gpu_profiler_inst.
|
||||
auto gpu_profiler_inst = profiler::gpu::GPUProfiler::GetInstance();
|
||||
// For cpu network, no gpu profiler, do not to raise exception.
|
||||
if (gpu_profiler_inst && gpu_profiler_inst->GetEnableFlag()) {
|
||||
gpu_profiler_inst->RecordOneStepStartEndInfo(op_name);
|
||||
}
|
||||
if (auto gpu_instance = Profiler::GetInstance(kGPUDevice);
|
||||
gpu_instance != nullptr && MsContext::GetInstance()->get_param<bool>(MS_CTX_ENABLE_MINDRT) &&
|
||||
gpu_instance->GetEnableFlag()) {
|
||||
gpu_instance->RecordOneStepStartEndInfo();
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void CPUProfiler::RecordFrameWorkInfo(const CNodePtr &kernel) {
|
||||
|
@ -153,16 +149,11 @@ void CPUProfiler::OpDataProducerBegin(const std::string op_name, const uint32_t
|
|||
op_time_mono_start_ = GetHostMonoTimeStamp();
|
||||
SetRunTimeData(op_name, pid);
|
||||
|
||||
#if ENABLE_GPU
|
||||
if (MsContext::GetInstance()->get_param<bool>(MS_CTX_ENABLE_MINDRT)) {
|
||||
// For heterogeneous scene, record op name to gpu_profiler_inst.
|
||||
auto gpu_profiler_inst = profiler::gpu::GPUProfiler::GetInstance();
|
||||
// For cpu network, no gpu profiler, do not to raise exception.
|
||||
if (gpu_profiler_inst && gpu_profiler_inst->GetEnableFlag()) {
|
||||
gpu_profiler_inst->RecordOneStepStartEndInfo(op_name);
|
||||
}
|
||||
if (auto gpu_instance = Profiler::GetInstance(kGPUDevice);
|
||||
gpu_instance != nullptr && MsContext::GetInstance()->get_param<bool>(MS_CTX_ENABLE_MINDRT) &&
|
||||
gpu_instance->GetEnableFlag()) {
|
||||
gpu_instance->RecordOneStepStartEndInfo();
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void CPUProfiler::OpDataProducerEnd() {
|
||||
|
@ -203,15 +194,6 @@ void CPUProfiler::ClearInst() {
|
|||
enable_flag_ = false;
|
||||
has_find = false;
|
||||
}
|
||||
|
||||
REGISTER_PYBIND_DEFINE(CPUProfiler_, ([](const py::module *m) {
|
||||
(void)py::class_<CPUProfiler, std::shared_ptr<CPUProfiler>>(*m, "CPUProfiler")
|
||||
.def_static("get_instance", &CPUProfiler::GetInstance, "CPUProfiler get_instance.")
|
||||
.def("init", &CPUProfiler::Init, py::arg("profile_data_path"), "init")
|
||||
.def("stop", &CPUProfiler::Stop, "stop")
|
||||
.def("step_profiling_enable", &CPUProfiler::StepProfilingEnable, py::arg("enable_flag"),
|
||||
"enable or disable step profiling");
|
||||
}));
|
||||
} // namespace cpu
|
||||
} // namespace profiler
|
||||
} // namespace mindspore
|
|
@ -14,8 +14,8 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_PROFILER_DEVICE_CPU_PROFILING_H
|
||||
#define MINDSPORE_CCSRC_PROFILER_DEVICE_CPU_PROFILING_H
|
||||
#ifndef MINDSPORE_CCSRC_PLUGIN_DEVICE_CPU_HAL_PROFILER_CPU_PROFILING_H
|
||||
#define MINDSPORE_CCSRC_PLUGIN_DEVICE_CPU_HAL_PROFILER_CPU_PROFILING_H
|
||||
#include <algorithm>
|
||||
#include <cstdio>
|
||||
#include <map>
|
||||
|
@ -25,16 +25,13 @@
|
|||
#include <utility>
|
||||
#include <vector>
|
||||
#include "profiler/device/profiling.h"
|
||||
#if ENABLE_GPU
|
||||
#include "profiler/device/gpu/gpu_profiling.h"
|
||||
#endif
|
||||
#include "actor/actormgr.h"
|
||||
#include "backend/common/session/kernel_graph.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace profiler {
|
||||
namespace cpu {
|
||||
const float kNanosecondToMillisecond = 1000000;
|
||||
constexpr float kNanosecondToMillisecond = 1000000;
|
||||
struct CurKernelInputInfo {
|
||||
uint32_t input_id;
|
||||
std::string shape;
|
||||
|
@ -46,13 +43,16 @@ struct CurKernelInfo {
|
|||
};
|
||||
class CPUProfiler : public Profiler {
|
||||
public:
|
||||
static std::shared_ptr<CPUProfiler> &GetInstance();
|
||||
static std::shared_ptr<CPUProfiler> GetInstance();
|
||||
|
||||
CPUProfiler() = default;
|
||||
~CPUProfiler() = default;
|
||||
CPUProfiler(const CPUProfiler &) = delete;
|
||||
CPUProfiler &operator=(const CPUProfiler &) = delete;
|
||||
|
||||
void Init(const std::string &profileDataPath) override;
|
||||
void Init(const std::string &profiling_path, uint32_t device_id, const std::string &profiling_options) override;
|
||||
void Finalize() override {}
|
||||
void Start() override {}
|
||||
void Stop() override;
|
||||
void StepProfilingEnable(const bool enable_flag) override;
|
||||
void OpDataProducerBegin(const std::string op_name, const uint32_t pid);
|
||||
|
@ -70,7 +70,6 @@ class CPUProfiler : public Profiler {
|
|||
void SaveProfileData() override;
|
||||
void ClearInst() override;
|
||||
|
||||
static std::shared_ptr<CPUProfiler> profiler_inst_;
|
||||
uint64_t base_time_;
|
||||
std::string op_name_;
|
||||
uint32_t pid_;
|
||||
|
@ -83,4 +82,4 @@ class CPUProfiler : public Profiler {
|
|||
} // namespace profiler
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // MINDSPORE_CCSRC_PROFILER_DEVICE_CPU_PROFILING_H
|
||||
#endif // MINDSPORE_CCSRC_PLUGIN_DEVICE_CPU_HAL_PROFILER_CPU_PROFILING_H
|
|
@ -37,8 +37,8 @@
|
|||
#include "ir/dtype.h"
|
||||
#include "backend/common/optimizer/dynamic_shape/dynamic_shape_helper.h"
|
||||
#ifndef ENABLE_SECURITY
|
||||
#include "profiler/device/gpu/gpu_profiling.h"
|
||||
#include "profiler/device/gpu/gpu_profiling_utils.h"
|
||||
#include "plugin/device/gpu/hal/profiler/gpu_profiling.h"
|
||||
#include "plugin/device/gpu/hal/profiler/gpu_profiling_utils.h"
|
||||
#endif
|
||||
#include "utils/shape_utils.h"
|
||||
#ifndef ENABLE_SECURITY
|
||||
|
|
|
@ -33,8 +33,8 @@
|
|||
#include "runtime/device/ms_device_shape_transfer.h"
|
||||
#include "common/graph_kernel/graph_kernel_flags.h"
|
||||
#include "plugin/device/gpu/hal/device/gpu_bucket.h"
|
||||
#include "profiler/device/gpu/gpu_profiling.h"
|
||||
#include "profiler/device/gpu/gpu_profiling_utils.h"
|
||||
#include "plugin/device/gpu/hal/profiler/gpu_profiling.h"
|
||||
#include "plugin/device/gpu/hal/profiler/gpu_profiling_utils.h"
|
||||
#include "backend/common/session/kernel_graph.h"
|
||||
#include "plugin/device/gpu/kernel/gpu_kernel.h"
|
||||
#include "plugin/device/gpu/kernel/gpu_kernel_factory.h"
|
||||
|
|
|
@ -0,0 +1,7 @@
|
|||
if(NOT ENABLE_SECURITY AND ENABLE_GPU)
|
||||
file(GLOB_RECURSE PROFILER_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
|
||||
|
||||
set_property(SOURCE ${PROFILER_SRC_LIST} PROPERTY COMPILE_DEFINITIONS
|
||||
SUBMODULE_ID=mindspore::SubModuleId::SM_PROFILER)
|
||||
add_library(_mindspore_plugin_device_gpu_hal_profiler_obj OBJECT ${PROFILER_SRC_LIST})
|
||||
endif()
|
|
@ -16,7 +16,7 @@
|
|||
#include <cupti.h>
|
||||
#include <dlfcn.h>
|
||||
#include "utils/log_adapter.h"
|
||||
#include "profiler/device/gpu/cupti_interface.h"
|
||||
#include "plugin/device/gpu/hal/profiler/cupti_interface.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace profiler {
|
|
@ -13,8 +13,8 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_CUPTI_INTERFACE_H
|
||||
#define MINDSPORE_CUPTI_INTERFACE_H
|
||||
#ifndef MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_HAL_PROFILER_CUPTI_INTERFACE_H
|
||||
#define MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_HAL_PROFILER_CUPTI_INTERFACE_H
|
||||
#ifndef FUNC_EXPORT
|
||||
#define FUNC_EXPORT __attribute__((visibility("default")))
|
||||
#endif
|
||||
|
@ -41,4 +41,4 @@ CUptiResult CuptiFinalize();
|
|||
} // namespace profiler
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // MINDSPORE_CUPTI_INTERFACE_H
|
||||
#endif // MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_HAL_PROFILER_CUPTI_INTERFACE_H
|
|
@ -13,7 +13,7 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "profiler/device/gpu/gpu_data_saver.h"
|
||||
#include "plugin/device/gpu/hal/profiler/gpu_data_saver.h"
|
||||
#include <fstream>
|
||||
#include <numeric>
|
||||
#include "sys/stat.h"
|
|
@ -14,16 +14,16 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_PROFILER_DEVICE_GPU_GPU_DATA_SAVER_H
|
||||
#define MINDSPORE_CCSRC_PROFILER_DEVICE_GPU_GPU_DATA_SAVER_H
|
||||
#ifndef MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_HAL_PROFILER_GPU_DATA_SAVER_H
|
||||
#define MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_HAL_PROFILER_GPU_DATA_SAVER_H
|
||||
#include <iostream>
|
||||
#include <algorithm>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <memory>
|
||||
#include "profiler/device/gpu/gpu_profiling.h"
|
||||
#include "profiler/device/cpu/cpu_data_saver.h"
|
||||
#include "plugin/device/gpu/hal/profiler/gpu_profiling.h"
|
||||
#include "plugin/device/cpu/hal/profiler/cpu_data_saver.h"
|
||||
#include "profiler/device/data_saver.h"
|
||||
namespace mindspore {
|
||||
namespace profiler {
|
||||
|
@ -105,4 +105,4 @@ class GpuDataSaver : public DataSaver {
|
|||
} // namespace profiler
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // MINDSPORE_CCSRC_PROFILER_DEVICE_GPU_GPU_DATA_SAVER_H
|
||||
#endif // MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_HAL_PROFILER_GPU_DATA_SAVER_H
|
|
@ -14,14 +14,14 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "profiler/device/gpu/gpu_profiling.h"
|
||||
#include "plugin/device/gpu/hal/profiler/gpu_profiling.h"
|
||||
|
||||
#include <cxxabi.h>
|
||||
#include <chrono>
|
||||
#include <cmath>
|
||||
#include <ctime>
|
||||
#include "profiler/device/gpu/cupti_interface.h"
|
||||
#include "profiler/device/gpu/gpu_data_saver.h"
|
||||
#include "plugin/device/gpu/hal/profiler/cupti_interface.h"
|
||||
#include "plugin/device/gpu/hal/profiler/gpu_data_saver.h"
|
||||
#include "include/common/pybind_api/api_register.h"
|
||||
#include "utils/log_adapter.h"
|
||||
#include "include/common/utils/utils.h"
|
||||
|
@ -31,6 +31,9 @@
|
|||
namespace mindspore {
|
||||
namespace profiler {
|
||||
namespace gpu {
|
||||
namespace {
|
||||
PROFILER_REG(kGPUDevice, GPUProfiler);
|
||||
} // namespace
|
||||
const size_t BUF_SIZE = 32 * 1024;
|
||||
const size_t ALIGN_SIZE = 8;
|
||||
#define CHECK_CUPTI_RET_WITH_ERROR(expression, message) \
|
||||
|
@ -66,8 +69,6 @@ const size_t ALIGN_SIZE = 8;
|
|||
} \
|
||||
} while (0)
|
||||
|
||||
std::shared_ptr<GPUProfiler> GPUProfiler::profiler_inst_ = std::make_shared<GPUProfiler>();
|
||||
|
||||
int32_t GetThreadID() {
|
||||
uint32_t thread_id = static_cast<uint32_t>(pthread_self());
|
||||
return thread_id;
|
||||
|
@ -216,9 +217,10 @@ std::string GetKernelFuncName(std::string kernel_name) {
|
|||
return kernel_name.substr(func_name_begin_iter);
|
||||
}
|
||||
|
||||
std::shared_ptr<GPUProfiler> &GPUProfiler::GetInstance() {
|
||||
MS_EXCEPTION_IF_NULL(profiler_inst_);
|
||||
return profiler_inst_;
|
||||
std::shared_ptr<GPUProfiler> GPUProfiler::GetInstance() {
|
||||
auto instance = Profiler::GetInstance(kGPUDevice);
|
||||
MS_EXCEPTION_IF_NULL(instance);
|
||||
return std::dynamic_pointer_cast<GPUProfiler>(instance);
|
||||
}
|
||||
|
||||
void GPUProfiler::SyncEnable(const bool enable_flag) {
|
||||
|
@ -385,7 +387,7 @@ void CUPTIAPI ActivityAllocBuffer(uint8_t **buffer, size_t *size, size_t *maxNum
|
|||
|
||||
void CUPTIAPI ActivityProcessBuffer(CUcontext ctx, uint32_t streamId, uint8_t *buffer, size_t size, size_t validSize);
|
||||
|
||||
void GPUProfiler::Init(const std::string &profileDataPath = "") {
|
||||
void GPUProfiler::Init(const std::string &profiling_path, uint32_t device_id, const std::string &profiling_options) {
|
||||
MS_LOG(INFO) << "Initialize GPU Profiling";
|
||||
if (subscriber_ != nullptr) {
|
||||
StopCUPTI();
|
||||
|
@ -412,7 +414,7 @@ void GPUProfiler::Init(const std::string &profileDataPath = "") {
|
|||
base_time_.host_start_time = GetHostTimeStamp();
|
||||
base_time_.host_start_monotonic_raw_time = GetHostMonoTimeStamp();
|
||||
|
||||
profile_data_path_ = profileDataPath;
|
||||
profile_data_path_ = profiling_path;
|
||||
MS_LOG(INFO) << "GPU start time(ns):" << base_time_.gpu_start_time
|
||||
<< " Host start time(ns):" << base_time_.host_start_time << " profile data path: " << profile_data_path_;
|
||||
is_init_ = true;
|
||||
|
@ -789,17 +791,6 @@ void CUPTIAPI GPUProfiler::ProcessBuffer(CUcontext ctx, uint32_t streamId, uint8
|
|||
|
||||
free(buffer);
|
||||
}
|
||||
|
||||
REGISTER_PYBIND_DEFINE(GPUProfiler_, ([](const py::module *m) {
|
||||
(void)py::class_<GPUProfiler, std::shared_ptr<GPUProfiler>>(*m, "GPUProfiler")
|
||||
.def_static("get_instance", &GPUProfiler::GetInstance, "GPUProfiler get_instance.")
|
||||
.def("init", &GPUProfiler::Init, py::arg("profile_data_path"), "init")
|
||||
.def("stop", &GPUProfiler::Stop, "stop")
|
||||
.def("step_profiling_enable", &GPUProfiler::StepProfilingEnable, py::arg("enable_flag"),
|
||||
"enable or disable step profiling")
|
||||
.def("sync_enable", &GPUProfiler::SyncEnable, py::arg("enable_flag"),
|
||||
"enable or disable synchronization profiling");
|
||||
}));
|
||||
} // namespace gpu
|
||||
} // namespace profiler
|
||||
} // namespace mindspore
|
|
@ -14,8 +14,8 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_PROFILER_DEVICE_GPU_GPU_PROFILING_H
|
||||
#define MINDSPORE_CCSRC_PROFILER_DEVICE_GPU_GPU_PROFILING_H
|
||||
#ifndef MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_HAL_PROFILER_GPU_PROFILING_H
|
||||
#define MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_HAL_PROFILER_GPU_PROFILING_H
|
||||
#include <cuda.h>
|
||||
#include <cupti.h>
|
||||
#include <algorithm>
|
||||
|
@ -28,7 +28,7 @@
|
|||
#include <utility>
|
||||
#include <vector>
|
||||
#include "profiler/device/profiling.h"
|
||||
#include "profiler/device/gpu/gpu_profiling_utils.h"
|
||||
#include "plugin/device/gpu/hal/profiler/gpu_profiling_utils.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace profiler {
|
||||
|
@ -118,18 +118,20 @@ class ProfilingOp {
|
|||
|
||||
class MS_CORE_API GPUProfiler : public Profiler {
|
||||
public:
|
||||
static std::shared_ptr<GPUProfiler> &GetInstance();
|
||||
static std::shared_ptr<GPUProfiler> GetInstance();
|
||||
|
||||
GPUProfiler() = default;
|
||||
~GPUProfiler() { StopCUPTI(); }
|
||||
GPUProfiler(const GPUProfiler &) = delete;
|
||||
GPUProfiler &operator=(const GPUProfiler &) = delete;
|
||||
|
||||
void Init(const std::string &profileDataPath) override;
|
||||
void Init(const std::string &profiling_path, uint32_t device_id, const std::string &profiling_options) override;
|
||||
void Finalize() override {}
|
||||
void Start() override {}
|
||||
void Stop() override;
|
||||
void StopCUPTI();
|
||||
void StepProfilingEnable(const bool enable_flag) override;
|
||||
void SyncEnable(const bool enable_flag);
|
||||
bool GetEnableFlag() const { return enable_flag_; }
|
||||
bool GetSyncEnableFlag() const { return sync_enable_flag_; }
|
||||
void EventHandleProcess(CUpti_CallbackId cbid, const CUpti_CallbackData *cbdata, const std::string &typestring,
|
||||
uint64_t startTimestamp, uint64_t endTimestamp);
|
||||
|
@ -157,8 +159,6 @@ class MS_CORE_API GPUProfiler : public Profiler {
|
|||
void SetRunTimeData(const std::string &op_name, void *stream);
|
||||
void FixOpNameByCorrelationId(Event *event);
|
||||
|
||||
static std::shared_ptr<GPUProfiler> profiler_inst_;
|
||||
bool enable_flag_ = false;
|
||||
bool sync_enable_flag_ = true;
|
||||
std::unordered_map<uint32_t, std::string> op_name_map_;
|
||||
std::vector<Event> events_;
|
||||
|
@ -194,4 +194,4 @@ class MS_CORE_API GPUProfiler : public Profiler {
|
|||
} // namespace profiler
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // MINDSPORE_CCSRC_PROFILER_DEVICE_GPU_PROFILING_H
|
||||
#endif // MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_HAL_PROFILER_GPU_PROFILING_H
|
|
@ -14,7 +14,7 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "profiler/device/gpu/gpu_profiling_utils.h"
|
||||
#include "plugin/device/gpu/hal/profiler/gpu_profiling_utils.h"
|
||||
#include "kernel/kernel.h"
|
||||
#include "backend/common/session/anf_runtime_algorithm.h"
|
||||
#include "include/common/utils/anfalgo.h"
|
|
@ -13,8 +13,8 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_CCSRC_PROFILER_DEVICE_GPU_GPU_PROFILING_UTILS_H_
|
||||
#define MINDSPORE_CCSRC_PROFILER_DEVICE_GPU_GPU_PROFILING_UTILS_H_
|
||||
#ifndef MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_HAL_PROFILER_GPU_PROFILING_UTILS_H_
|
||||
#define MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_HAL_PROFILER_GPU_PROFILING_UTILS_H_
|
||||
|
||||
#include <map>
|
||||
#include <memory>
|
||||
|
@ -70,4 +70,4 @@ class ProfilingUtils {
|
|||
} // namespace gpu
|
||||
} // namespace profiler
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_CCSRC_PROFILER_DEVICE_GPU_GPU_PROFILING_UTILS_H_
|
||||
#endif // MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_HAL_PROFILER_GPU_PROFILING_UTILS_H_
|
|
@ -25,7 +25,7 @@
|
|||
#include "kernel/common_utils.h"
|
||||
|
||||
#ifndef ENABLE_SECURITY
|
||||
#include "profiler/device/gpu/gpu_profiling.h"
|
||||
#include "plugin/device/gpu/hal/profiler/gpu_profiling.h"
|
||||
#endif
|
||||
#include "runtime/data_queue/data_queue_mgr.h"
|
||||
#include "plugin/device/gpu/hal/device/gpu_common.h"
|
||||
|
|
|
@ -20,7 +20,7 @@
|
|||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include "profiler/device/gpu/gpu_profiling.h"
|
||||
#include "plugin/device/gpu/hal/profiler/gpu_profiling.h"
|
||||
|
||||
using mindspore::profiler::gpu::ProfilingOp;
|
||||
|
||||
|
|
|
@ -1,27 +1,8 @@
|
|||
if(NOT ENABLE_SECURITY)
|
||||
if(ENABLE_GPU)
|
||||
file(GLOB_RECURSE PROFILER_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
|
||||
"device/gpu/*.cc" "device/cpu/*.cc")
|
||||
endif()
|
||||
|
||||
if(ENABLE_D)
|
||||
file(GLOB_RECURSE PROFILER_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
|
||||
"device/cpu/*.cc" "device/ascend/*.cc")
|
||||
endif()
|
||||
|
||||
if(ENABLE_CPU AND NOT (ENABLE_D OR ENABLE_GPU))
|
||||
file(GLOB_RECURSE PROFILER_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
|
||||
"device/cpu/*.cc")
|
||||
endif()
|
||||
|
||||
list(APPEND PROFILER_SRC_LIST ${CMAKE_CURRENT_SOURCE_DIR}/device/profiling.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/device/data_saver.cc)
|
||||
|
||||
set_property(SOURCE ${PROFILER_SRC_LIST} PROPERTY COMPILE_DEFINITIONS
|
||||
SUBMODULE_ID=mindspore::SubModuleId::SM_PROFILER)
|
||||
add_library(_mindspore_profiler_obj OBJECT ${PROFILER_SRC_LIST})
|
||||
|
||||
if(ENABLE_D)
|
||||
add_dependencies(_mindspore_profiler_obj mindspore::protobuf)
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
|
|
@ -22,16 +22,28 @@
|
|||
#include "include/common/pybind_api/api_register.h"
|
||||
#include "utils/log_adapter.h"
|
||||
#include "include/common/utils/utils.h"
|
||||
#if ENABLE_GPU
|
||||
#include "profiler/device/gpu/gpu_profiling.h"
|
||||
#endif
|
||||
#if ENABLE_D
|
||||
#include "profiler/device/ascend/ascend_profiling.h"
|
||||
#endif
|
||||
#include "utils/ms_context.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace profiler {
|
||||
std::shared_ptr<ProfilerManager> ProfilerManager::profiler_manager_inst_ = std::make_shared<ProfilerManager>();
|
||||
std::shared_ptr<Profiler> Profiler::GetInstance(const std::string &name) noexcept {
|
||||
if (auto iter = instance_map_.find(name); iter != instance_map_.end()) {
|
||||
return iter->second;
|
||||
}
|
||||
|
||||
MS_LOG(WARNING) << "Profiler instance " << name << " not found.";
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
bool Profiler::Register(const std::string &name, const std::shared_ptr<Profiler> &instance) {
|
||||
if (instance_map_.find(name) != instance_map_.end()) {
|
||||
MS_LOG(WARNING) << name << " has been registered.";
|
||||
} else {
|
||||
(void)instance_map_.emplace(name, instance);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
uint64_t Profiler::GetHostMonoTimeStamp() const {
|
||||
struct timespec ts;
|
||||
|
@ -163,32 +175,28 @@ std::shared_ptr<ProfilerManager> &ProfilerManager::GetInstance() {
|
|||
}
|
||||
|
||||
bool ProfilerManager::GetProfilingEnableFlag() const {
|
||||
#if ENABLE_GPU
|
||||
return profiler::gpu::GPUProfiler::GetInstance()->GetEnableFlag();
|
||||
#endif
|
||||
#if ENABLE_D
|
||||
auto ascend_instance = profiler::ascend::AscendProfiler::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(ascend_instance);
|
||||
return ascend_instance->GetProfilingEnableFlag();
|
||||
#endif
|
||||
if (auto gpu_instance = Profiler::GetInstance(kGPUDevice); gpu_instance != nullptr) {
|
||||
return gpu_instance->GetEnableFlag();
|
||||
}
|
||||
|
||||
if (auto ascend_instance = Profiler::GetInstance(kAscendDevice); ascend_instance != nullptr) {
|
||||
return ascend_instance->GetEnableFlag();
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void ProfilerManager::RecordOneStepStartEndInfo() const {
|
||||
#if ENABLE_GPU
|
||||
auto gpu_profiler_inst = profiler::gpu::GPUProfiler::GetInstance();
|
||||
if (gpu_profiler_inst->GetEnableFlag()) {
|
||||
gpu_profiler_inst->RecordOneStepStartEndInfo();
|
||||
if (auto gpu_instance = Profiler::GetInstance(kGPUDevice); gpu_instance != nullptr && gpu_instance->GetEnableFlag()) {
|
||||
gpu_instance->RecordOneStepStartEndInfo();
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
std::string ProfilerManager::GetProfilingOptions() const {
|
||||
#if ENABLE_D
|
||||
auto ascend_instance = profiler::ascend::AscendProfiler::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(ascend_instance);
|
||||
return ascend_instance->GetProfilingOptions();
|
||||
#endif
|
||||
if (auto ascend_instance = Profiler::GetInstance(kAscendDevice); ascend_instance != nullptr) {
|
||||
return ascend_instance->GetProfilingOptions();
|
||||
}
|
||||
|
||||
return "";
|
||||
}
|
||||
|
||||
|
@ -197,5 +205,18 @@ REGISTER_PYBIND_DEFINE(ProfilerManager_, ([](const py::module *m) {
|
|||
.def_static("get_instance", &ProfilerManager::GetInstance, "ProfilerManager get_instance.")
|
||||
.def("dynamic_status", &ProfilerManager::GetNetDynamicShapeStatus, "dynamic_status");
|
||||
}));
|
||||
|
||||
REGISTER_PYBIND_DEFINE(Profiler_, ([](const py::module *m) {
|
||||
(void)py::class_<Profiler, std::shared_ptr<Profiler>>(*m, "Profiler")
|
||||
.def_static("get_instance", &Profiler::GetInstance, py::arg("device_name"),
|
||||
"Profiler get_instance.")
|
||||
.def("init", &Profiler::Init, py::arg("profiling_path"), py::arg("device_id") = py::int_(0),
|
||||
py::arg("profiling_options") = py::str(""), "init")
|
||||
.def("start", &Profiler::Start, "start")
|
||||
.def("stop", &Profiler::Stop, "stop")
|
||||
.def("finalize", &Profiler::Finalize, "finalize")
|
||||
.def("step_profiling_enable", &Profiler::StepProfilingEnable, py::arg("enable_flag"),
|
||||
"enable or disable step profiling");
|
||||
}));
|
||||
} // namespace profiler
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -70,21 +70,28 @@ class ProfilerManager {
|
|||
void SetNetDynamicShapeStatus() { is_dynamic_shape_net_ = true; }
|
||||
|
||||
private:
|
||||
static std::shared_ptr<ProfilerManager> profiler_manager_inst_;
|
||||
inline static std::shared_ptr<ProfilerManager> profiler_manager_inst_ = std::make_shared<ProfilerManager>();
|
||||
bool is_dynamic_shape_net_ = 0;
|
||||
};
|
||||
|
||||
class Profiler {
|
||||
public:
|
||||
static std::shared_ptr<Profiler> GetInstance(const std::string &name) noexcept;
|
||||
static bool Register(const std::string &name, const std::shared_ptr<Profiler> &instance);
|
||||
|
||||
Profiler() = default;
|
||||
virtual ~Profiler() = default;
|
||||
|
||||
virtual void Init(const std::string &profileDataPath) = 0;
|
||||
virtual void Init(const std::string &profiling_path, uint32_t device_id, const std::string &profiling_options) = 0;
|
||||
virtual void Finalize() = 0;
|
||||
bool IsInitialized() const { return init_flag_; }
|
||||
virtual void Start() = 0;
|
||||
virtual void Stop() = 0;
|
||||
virtual void StepProfilingEnable(const bool enable_flag) = 0;
|
||||
virtual void OpDataProducerEnd() = 0;
|
||||
void RecordOneStepStartEndInfo();
|
||||
bool GetEnableFlag() const { return enable_flag_; }
|
||||
std::string GetProfilingOptions() const { return profiling_options_; }
|
||||
std::string ProfileDataPath() const { return profile_data_path_; }
|
||||
void RecordOneStepStartEndInfo(std::string op_name);
|
||||
std::pair<double, double> GetSingleOpLaunchTime() { return single_op_launch_start_time_end_time_; }
|
||||
|
@ -112,8 +119,15 @@ class Profiler {
|
|||
std::shared_mutex op_map_mutex_;
|
||||
std::mutex record_mutex_;
|
||||
bool init_flag_ = false;
|
||||
std::string profiling_options_;
|
||||
|
||||
private:
|
||||
inline static std::map<std::string, std::shared_ptr<Profiler>> instance_map_ = {};
|
||||
};
|
||||
} // namespace profiler
|
||||
} // namespace mindspore
|
||||
|
||||
#define PROFILER_REG(NAME, CLAZZ) \
|
||||
static bool g_Profiler_##NAME##_reg_result = mindspore::profiler::Profiler::Register(NAME, std::make_shared<CLAZZ>())
|
||||
|
||||
#endif // MINDSPORE_CCSRC_PROFILER_DEVICE_PROFILING_H
|
||||
|
|
|
@ -232,8 +232,8 @@ class Profiler:
|
|||
profiler_manager = c_expression.ProfilerManager
|
||||
self._profiler_manager = profiler_manager.get_instance()
|
||||
if self._device_target:
|
||||
cpu_profiler = c_expression.CPUProfiler
|
||||
self._cpu_profiler = cpu_profiler.get_instance()
|
||||
cpu_profiler = c_expression.Profiler
|
||||
self._cpu_profiler = cpu_profiler.get_instance("CPU")
|
||||
self._cpu_profiler.init(self._output_path)
|
||||
|
||||
if self._device_target and self._device_target == DeviceTarget.CPU.value:
|
||||
|
@ -264,8 +264,8 @@ class Profiler:
|
|||
raise RuntimeError("Pynative model is not supported on GPU currently.")
|
||||
self._parse_parameter_for_gpu(kwargs)
|
||||
|
||||
gpu_profiler = c_expression.GPUProfiler
|
||||
self._gpu_profiler = gpu_profiler.get_instance()
|
||||
gpu_profiler = c_expression.Profiler
|
||||
self._gpu_profiler = gpu_profiler.get_instance("GPU")
|
||||
self._gpu_profiler.init(self._output_path)
|
||||
if GlobalComm.WORLD_COMM_GROUP == "nccl_world_group":
|
||||
self._dev_id = str(get_rank())
|
||||
|
@ -289,7 +289,7 @@ class Profiler:
|
|||
logger.critical(msg)
|
||||
raise ValueError(msg)
|
||||
# use context interface to open profiling, for the new mindspore version(after 2020.5.21)
|
||||
self._ascend_profiler = c_expression.AscendProfiler.get_instance()
|
||||
self._ascend_profiler = c_expression.Profiler.get_instance("Ascend")
|
||||
self._ascend_profiler.init(self._output_path, int(self._dev_id), self._ascend_profiling_options)
|
||||
base_profiling_container_path = os.path.join(self._output_path, "container")
|
||||
container_path = os.path.join(base_profiling_container_path, self._dev_id)
|
||||
|
@ -415,6 +415,7 @@ class Profiler:
|
|||
|
||||
def _ascend_pynative_analyse(self):
|
||||
"""Collect and analyse ascend pynative model performance data."""
|
||||
self._ascend_profiler.finalize()
|
||||
op_intermediate_parser = OPIntermediateParser(self._output_path, self._rank_id)
|
||||
op_intermediate_parser.parser_pynative_op_type()
|
||||
op_intermediate_parser.parser_pynative_op_intermediate_detail()
|
||||
|
@ -697,8 +698,8 @@ class Profiler:
|
|||
|
||||
def _ascend_pynative_start(self):
|
||||
"""Ascend pynative mode start profiling."""
|
||||
pynative_profiler = c_expression.PynativeProfiler
|
||||
self._pynative_profiler = pynative_profiler.get_instance()
|
||||
pynative_profiler = c_expression.Profiler
|
||||
self._pynative_profiler = pynative_profiler.get_instance("PyNative")
|
||||
self._pynative_profiler.init(self._output_path)
|
||||
self._ascend_profiler.start()
|
||||
|
||||
|
|
|
@ -208,7 +208,7 @@ file(GLOB_RECURSE MINDSPORE_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
|
|||
"../../../mindspore/ccsrc/distributed/rpc/tcp/*.cc"
|
||||
"../../../mindspore/ccsrc/distributed/cluster/topology/*.cc"
|
||||
"../../../mindspore/ccsrc/distributed/embedding_cache/*.cc"
|
||||
"../../../mindspore/ccsrc/profiler/device/ascend/*.cc"
|
||||
"../../../mindspore/ccsrc/plugin/device/ascend/hal/profiler/*.cc"
|
||||
"../../../mindspore/ccsrc/profiler/device/profiling.cc"
|
||||
"../../../mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/fp32/adam_fp32.c"
|
||||
"../../../mindspore/ccsrc/kernel/kernel.cc"
|
||||
|
@ -252,12 +252,15 @@ list(REMOVE_ITEM MINDSPORE_SRC_LIST
|
|||
|
||||
if(ENABLE_SECURITY)
|
||||
list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/profiler/device/profiling.cc")
|
||||
list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/profiler/device/ascend/memory_profiling.cc")
|
||||
list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/profiler/device/ascend/ascend_profiling.cc")
|
||||
list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/profiler/device/ascend/options.cc")
|
||||
list(REMOVE_ITEM MINDSPORE_SRC_LIST
|
||||
"../../../mindspore/ccsrc/plugin/device/ascend/hal/profiler/memory_profiling.cc")
|
||||
list(REMOVE_ITEM MINDSPORE_SRC_LIST
|
||||
"../../../mindspore/ccsrc/plugin/device/ascend/hal/profiler/ascend_profiling.cc")
|
||||
list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/plugin/device/ascend/hal/profiler/options.cc")
|
||||
list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/debug/data_dump/dump_json_parser.cc")
|
||||
endif()
|
||||
list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/profiler/device/ascend/parallel_strategy_profiling.cc")
|
||||
list(REMOVE_ITEM MINDSPORE_SRC_LIST
|
||||
"../../../mindspore/ccsrc/plugin/device/ascend/hal/profiler/parallel_strategy_profiling.cc")
|
||||
|
||||
add_library(_ut_mindspore_obj OBJECT ${MINDSPORE_SRC_LIST})
|
||||
add_library(_ut_ut_obj OBJECT ${UT_SRCS})
|
||||
|
|
|
@ -14,7 +14,7 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "profiler/device/ascend/parallel_strategy_profiling.h"
|
||||
#include "plugin/device/ascend/hal/profiler/parallel_strategy_profiling.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace profiler {
|
||||
|
|
Loading…
Reference in New Issue