library size optimization for 310

Signed-off-by: zhoufeng <zhoufeng54@huawei.com>
2021-03-07 17:06:18 +08:00 · 2021-03-07 17:06:18 +08:00 · e4c59a1c8f
parent 57f7bf8480
commit e4c59a1c8f
20 changed files with 90 additions and 20 deletions
--- a/cmake/external_libs/glog.cmake
+++ b/cmake/external_libs/glog.cmake
@ -1,8 +1,15 @@
-set(glog_CXXFLAGS "-D_FORTIFY_SOURCE=2 -O2 ${SECURE_CXX_FLAGS}")
+set(glog_CXXFLAGS "-D_FORTIFY_SOURCE=2 -O2 ${SECURE_CXX_FLAGS} -Dgoogle=mindspore_private")
+set(glog_CFLAGS "-D_FORTIFY_SOURCE=2 -O2")
 if(NOT ENABLE_GLIBCXX)
    set(glog_CXXFLAGS "${glog_CXXFLAGS} -D_GLIBCXX_USE_CXX11_ABI=0")
 endif()
-set(glog_CFLAGS "-D_FORTIFY_SOURCE=2 -O2")
+if(BUILD_LITE)
+    set(glog_patch "")
+    set(glog_lib glog)
+else()
+    set(glog_patch ${CMAKE_SOURCE_DIR}/third_party/patch/glog/glog.patch001)
+    set(glog_lib mindspore_glog)
+endif()
 if(ENABLE_GITEE)
    set(REQ_URL "https://gitee.com/mirrors/glog/repository/archive/v0.4.0.tar.gz")
    set(MD5 "22fe340ddc231e6c8e46bc295320f8ee")
@ -10,11 +17,13 @@ else()
    set(REQ_URL "https://github.com/google/glog/archive/v0.4.0.tar.gz")
    set(MD5 "0daea8785e6df922d7887755c3d100d0")
 endif()
+
 mindspore_add_pkg(glog
        VER 0.4.0
-        LIBS glog
+        LIBS ${glog_lib}
        URL ${REQ_URL}
        MD5 ${MD5}
+        PATCHES ${glog_patch}
        CMAKE_OPTION -DBUILD_TESTING=OFF -DCMAKE_POSITION_INDEPENDENT_CODE=ON -DBUILD_SHARED_LIBS=ON -DWITH_GFLAGS=OFF)
 include_directories(${glog_INC})
-add_library(mindspore::glog ALIAS glog::glog)
+add_library(mindspore::glog ALIAS glog::${glog_lib})
--- a/cmake/package.cmake
+++ b/cmake/package.cmake
@ -76,7 +76,7 @@ install(
 )

 if(USE_GLOG)
-    file(GLOB_RECURSE GLOG_LIB_LIST ${glog_LIBPATH}/libglog*)
+    file(GLOB_RECURSE GLOG_LIB_LIST ${glog_LIBPATH}/libmindspore_glog*)
    install(
        FILES ${GLOG_LIB_LIST}
        DESTINATION ${INSTALL_LIB_DIR}
--- a/include/api/context.h
+++ b/include/api/context.h
@ -136,12 +136,15 @@ class MS_API Ascend310DeviceInfo : public DeviceInfoContext {
  inline void SetDumpConfigPath(const std::string &cfg_path);
  inline std::string GetDumpConfigPath() const;

+  // aipp config file
  inline void SetInsertOpConfigPath(const std::string &cfg_path);
  inline std::string GetInsertOpConfigPath() const;

+  // nchw or nhwc
  inline void SetInputFormat(const std::string &format);
  inline std::string GetInputFormat() const;

+  // Mandatory while dynamic batch: e.g. "input_op_name1: 1,2,3,4;input_op_name2: 4,3,2,1"
  inline void SetInputShape(const std::string &shape);
  inline std::string GetInputShape() const;

@ -151,18 +154,25 @@ class MS_API Ascend310DeviceInfo : public DeviceInfoContext {
  void SetDynamicBatchSize(const std::vector<size_t> &dynamic_batch_size);
  inline std::string GetDynamicBatchSize() const;

+  // FP32, UINT8 or FP16, default as FP32
  void SetOutputType(enum DataType output_type);
  enum DataType GetOutputType() const;

+  // "force_fp16", "allow_fp32_to_fp16", "must_keep_origin_dtype" or "allow_mix_precision", default as "force_fp16"
  inline void SetPrecisionMode(const std::string &precision_mode);
  inline std::string GetPrecisionMode() const;

+  // Optional "high_performance" and "high_precision", "high_performance" is set as default
  inline void SetOpSelectImplMode(const std::string &op_select_impl_mode);
  inline std::string GetOpSelectImplMode() const;

  inline void SetFusionSwitchConfigPath(const std::string &cfg_path);
  inline std::string GetFusionSwitchConfigPath() const;

+  // Optional "l1_optimize", "l2_optimize", "off_optimize" or "l1_and_l2_optimize", default as "l2_optimize"
+  inline void SetBufferOptimizeMode(const std::string &buffer_optimize_mode);
+  inline std::string GetBufferOptimizeMode() const;
+
 private:
  void SetDumpConfigPath(const std::vector<char> &cfg_path);
  std::vector<char> GetDumpConfigPathChar() const;
@ -186,6 +196,9 @@ class MS_API Ascend310DeviceInfo : public DeviceInfoContext {

  void SetFusionSwitchConfigPath(const std::vector<char> &cfg_path);
  std::vector<char> GetFusionSwitchConfigPathChar() const;
+
+  void SetBufferOptimizeMode(const std::vector<char> &buffer_optimize_mode);
+  std::vector<char> GetBufferOptimizeModeChar() const;
 };

 void Ascend310DeviceInfo::SetDumpConfigPath(const std::string &cfg_path) { SetDumpConfigPath(StringToChar(cfg_path)); }
@ -220,5 +233,10 @@ void Ascend310DeviceInfo::SetFusionSwitchConfigPath(const std::string &cfg_path)
 std::string Ascend310DeviceInfo::GetFusionSwitchConfigPath() const {
  return CharToString(GetFusionSwitchConfigPathChar());
 }
+
+void Ascend310DeviceInfo::SetBufferOptimizeMode(const std::string &buffer_optimize_mode) {
+  SetBufferOptimizeMode(StringToChar(buffer_optimize_mode));
+}
+std::string Ascend310DeviceInfo::GetBufferOptimizeMode() const { return CharToString(GetBufferOptimizeModeChar()); }
 }  // namespace mindspore
 #endif  // MINDSPORE_INCLUDE_API_CONTEXT_H
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/trt/trt_utils.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/trt/trt_utils.h
@ -106,6 +106,7 @@ class TrtLogger : public nvinfer1::ILogger {
  // Redirect Tensor-RT inner log to GLOG
  void log(Severity severity, const char *msg) override {
 #ifdef USE_GLOG
+#define google mindspore_private
    static std::map<Severity, std::tuple<MsLogLevel, int, std::string>> logger_map = {
      {Severity::kVERBOSE, {MsLogLevel::DEBUG, google::GLOG_INFO, "VERBOSE"}},
      {Severity::kINFO, {MsLogLevel::INFO, google::GLOG_INFO, "INFO"}},
@ -127,6 +128,7 @@ class TrtLogger : public nvinfer1::ILogger {

    google::LogMessage("", 0, std::get<1>(level)).stream()
      << "[TensorRT " << std::get<2>(level) << "] " << msg << std::endl;
+#undef google
 #endif  // USE_GLOG
  }

--- a/mindspore/ccsrc/cxx_api/CMakeLists.txt
+++ b/mindspore/ccsrc/cxx_api/CMakeLists.txt
@ -15,6 +15,7 @@ if(ENABLE_ACL)
            "model/model_converter_utils/*.cc"
            "graph/acl/*.cc"
            )
+    list(APPEND API_ACL_SRC $<TARGET_OBJECTS:_mindspore_transform_graph_ir_obj>)
 endif()

 if(ENABLE_D)
@ -46,7 +47,7 @@ if(CMAKE_SYSTEM_NAME MATCHES "Darwin")
    target_link_libraries(mindspore_shared_lib PRIVATE ${PYTHON_LIBRARIES} ${SECUREC_LIBRARY}
            -Wl,-force_load mindspore -Wl,-noall_load proto_input mindspore_gvar mindspore::protobuf)
 else()
-    if(ENABLE_D OR ENABLE_ACL OR ENABLE_GPU)
+    if(ENABLE_D OR ENABLE_GPU)
        target_link_libraries(mindspore_shared_lib PRIVATE ${PYTHON_LIBRARIES} ${SECUREC_LIBRARY}
            -Wl,--whole-archive mindspore -Wl,--no-whole-archive proto_input mindspore_gvar mindspore::protobuf)
    else()
--- a/mindspore/ccsrc/cxx_api/context.cc
+++ b/mindspore/ccsrc/cxx_api/context.cc
@ -30,20 +30,16 @@ constexpr auto kModelOptionNvidiaGpuTrtInferMode = "mindspore.option.nvidia_gpu.
 constexpr auto kModelOptionAscend910DeviceID = kModelOptionDeviceID;
 constexpr auto kModelOptionAscend310DeviceID = kModelOptionDeviceID;
 constexpr auto kModelOptionAscend310DumpCfgPath = "mindspore.option.ascend310.dump_config_file_path";
-constexpr auto kModelOptionAscend310InsertOpCfgPath =
-  "mindspore.option.ascend310.insert_op_config_file_path";                                    // aipp config file
-constexpr auto kModelOptionAscend310InputFormat = "mindspore.option.ascend310.input_format";  // nchw or nhwc
+constexpr auto kModelOptionAscend310InsertOpCfgPath = "mindspore.option.ascend310.insert_op_config_file_path";
+constexpr auto kModelOptionAscend310InputFormat = "mindspore.option.ascend310.input_format";
 constexpr auto kModelOptionAscend310InputShapeMap = "mindspore.option.ascend310.input_shape_map";
 constexpr auto kModelOptionAscend310InputShape = "mindspore.option.ascend310.input_shape";
-// Mandatory while dynamic batch: e.g. "input_op_name1: n1,c2,h3,w4;input_op_name2: n4,c3,h2,w1"
-constexpr auto kModelOptionAscend310OutputType =
-  "mindspore.option.ascend310.output_type";  // "FP32", "UINT8" or "FP16", default as "FP32"
+constexpr auto kModelOptionAscend310OutputType = "mindspore.option.ascend310.output_type";
 constexpr auto kModelOptionAscend310PrecisionMode = "mindspore.option.ascend310.precision_mode";
-// "force_fp16", "allow_fp32_to_fp16", "must_keep_origin_dtype" or "allow_mix_precision", default as "force_fp16"
 constexpr auto kModelOptionAscend310OpSelectImplMode = "mindspore.option.ascend310.op_select_impl_mode";
 constexpr auto KModelOptionAscend310FusionSwitchCfgPath = "mindspore.option.ascend310.fusion_switch_config_file_path";
-// "False": Inference with native backend, "True": Inference with Tensor-RT engine, default as "False"
 constexpr auto kModelOptionAscend310DynamicBatchSize = "mindspore.option.ascend310.dynamic_batch_size";
+constexpr auto kModelOptionAscend310BufferOptimize = "mindspore.option.ascend310.buffer_optimize";

 namespace mindspore {
 class Allocator {};
@ -279,4 +275,14 @@ enum DataType Ascend310DeviceInfo::GetOutputType() const {
  MS_EXCEPTION_IF_NULL(data_);
  return GetValue<enum DataType>(data_, kModelOptionAscend310OutputType);
 }
+
+void Ascend310DeviceInfo::SetBufferOptimizeMode(const std::vector<char> &buffer_optimize_mode) {
+  MS_EXCEPTION_IF_NULL(data_);
+  data_->params[kModelOptionAscend310BufferOptimize] = CharToString(buffer_optimize_mode);
+}
+std::vector<char> Ascend310DeviceInfo::GetBufferOptimizeModeChar() const {
+  MS_EXCEPTION_IF_NULL(data_);
+  const std::string &ref = GetValue<std::string>(data_, kModelOptionAscend310BufferOptimize);
+  return StringToChar(ref);
+}
 }  // namespace mindspore
--- a/mindspore/ccsrc/cxx_api/graph/acl/acl_env_guard.cc
+++ b/mindspore/ccsrc/cxx_api/graph/acl/acl_env_guard.cc
@ -51,7 +51,7 @@ std::shared_ptr<AclEnvGuard> AclEnvGuard::GetAclEnv(std::string_view cfg_file) {
  } else {
    acl_env = std::make_shared<AclEnvGuard>(cfg_file);
    aclError ret = acl_env->GetErrno();
-    if (ret != ACL_ERROR_NONE) {
+    if (ret != ACL_ERROR_NONE && ret != ACL_ERROR_REPEAT_INITIALIZE) {
      MS_LOG(ERROR) << "Execute aclInit Failed";
      return nullptr;
    }
--- a/mindspore/ccsrc/cxx_api/model/acl/acl_model_options.cc
+++ b/mindspore/ccsrc/cxx_api/model/acl/acl_model_options.cc
@ -54,6 +54,7 @@ AclModelOptions::AclModelOptions(const std::shared_ptr<Context> &context) {
  fusion_switch_cfg_path_ = ascend310_info->GetFusionSwitchConfigPath();
  device_id_ = ascend310_info->GetDeviceID();
  dump_cfg_path_ = ascend310_info->GetDumpConfigPath();
+  buffer_optimize_mode_ = ascend310_info->GetBufferOptimizeMode();
 }

 void AclModelOptions::RenameInput(const std::vector<std::string> &input_names) {
@ -78,7 +79,8 @@ std::tuple<std::map<std::string, std::string>, std::map<std::string, std::string
  const std::map<std::string const *, std::string> init_options_map = {
    {&op_select_impl_mode_, ge::ir_option::OP_SELECT_IMPL_MODE},
    {&soc_version_, ge::ir_option::SOC_VERSION},
-    {&fusion_switch_cfg_path_, ge::ir_option::FUSION_SWITCH_FILE}};
+    {&fusion_switch_cfg_path_, ge::ir_option::FUSION_SWITCH_FILE},
+    {&buffer_optimize_mode_, ge::ir_option::BUFFER_OPTIMIZE}};

  const std::map<std::string const *, std::string> build_options_map = {
    {&insert_op_cfg_path_, ge::ir_option::INSERT_OP_FILE},
--- a/mindspore/ccsrc/cxx_api/model/acl/acl_model_options.h
+++ b/mindspore/ccsrc/cxx_api/model/acl/acl_model_options.h
@ -51,6 +51,7 @@ class AclModelOptions {
  std::string soc_version_ = "Ascend310";
  std::string dynamic_batch_size_;
  std::string dynamic_image_size_;
+  std::string buffer_optimize_mode_;
  std::map<int, std::vector<int>> input_shape_map_;
  // other options
  uint32_t device_id_;
--- a/mindspore/ccsrc/cxx_api/model/model.cc
+++ b/mindspore/ccsrc/cxx_api/model/model.cc
@ -112,7 +112,7 @@ MSTensor Model::GetInputByTensorName(const std::vector<char> &tensor_name) {
    }
  }

-  return MSTensor(std::shared_ptr<MSTensor::Impl>(nullptr));
+  return MSTensor(nullptr);
 }

 std::vector<std::vector<char>> Model::GetOutputTensorNamesChar() {
@ -132,7 +132,7 @@ MSTensor Model::GetOutputByTensorName(const std::vector<char> &tensor_name) {
    }
  }

-  return MSTensor(std::shared_ptr<MSTensor::Impl>(nullptr));
+  return MSTensor(nullptr);
 }

 Model::Model() : impl_(nullptr) {}
--- a/mindspore/ccsrc/minddata/dataset/engine/cache/cache_admin.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/cache/cache_admin.cc
@ -29,6 +29,7 @@ int main(int argc, char **argv) {
  std::stringstream arg_stream;

 #ifdef USE_GLOG
+#define google mindspore_private
  FLAGS_logtostderr = false;
  FLAGS_log_dir = ds::DefaultLogDir();
  // Create default log dir
@ -39,6 +40,7 @@ int main(int argc, char **argv) {
    return 1;
  }
  google::InitGoogleLogging(argv[0]);
+#undef google
 #endif

  if (argc == 1) {
--- a/mindspore/ccsrc/minddata/dataset/engine/cache/cache_main.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/cache/cache_main.cc
@ -59,6 +59,7 @@ ms::Status StartServer(int argc, char **argv) {
  ds::SharedMessage msg;
  if (daemonize) {
 #ifdef USE_GLOG
+#define google mindspore_private
    FLAGS_logtostderr = false;
    FLAGS_log_dir = ds::DefaultLogDir();
    // Create cache server default log dir
@ -69,6 +70,7 @@ ms::Status StartServer(int argc, char **argv) {
    }
    ms::g_ms_submodule_log_levels[SUBMODULE_ID] = strtol(argv[5], nullptr, 10);
    google::InitGoogleLogging(argv[0]);
+#undef google
 #endif
    rc = msg.Create();
    if (rc.IsError()) {
--- a/mindspore/ccsrc/minddata/dataset/engine/cache/perf/cache_perf.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/cache/perf/cache_perf.cc
@ -22,9 +22,11 @@ namespace ds = mindspore::dataset;

 int main(int argc, char **argv) {
 #ifdef USE_GLOG
+#define google mindspore_private
  FLAGS_logtostderr = false;
  FLAGS_log_dir = "/tmp";
  google::InitGoogleLogging(argv[0]);
+#undef google
 #endif
  ds::CachePerfRun cachePerfRun;
  if (cachePerfRun.ProcessArgs(argc, argv) == 0) {
--- a/mindspore/ccsrc/minddata/dataset/engine/cache/perf/cache_pipeline.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/cache/perf/cache_pipeline.cc
@ -23,9 +23,11 @@ namespace ds = mindspore::dataset;

 int main(int argc, char **argv) {
 #ifdef USE_GLOG
+#define google mindspore_private
  FLAGS_logtostderr = false;
  FLAGS_log_dir = "/tmp";
  google::InitGoogleLogging(argv[0]);
+#undef google
 #endif
  ds::CachePipelineRun cachePipelineRun;
  if (cachePipelineRun.ProcessArgs(argc, argv) == 0) {
--- a/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/CMakeLists.txt
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/CMakeLists.txt
@ -1,5 +1,6 @@
 file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
 set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD)
+set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS google=mindspore_private)
 add_subdirectory(utils)
 add_library(kernels-soft-dvpp-image OBJECT
            soft_dvpp_decode_resize_jpeg_op.cc
--- a/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/utils/CMakeLists.txt
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/utils/CMakeLists.txt
@ -1,5 +1,6 @@
-file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cpp")
+file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
 set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD)
+set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS google=mindspore_private)
 add_library(soft-dvpp-utils OBJECT
            soft_dp.cc
            soft_dp_tools.cc
--- a/mindspore/ccsrc/ps/optimizer_info.cc
+++ b/mindspore/ccsrc/ps/optimizer_info.cc
@ -85,8 +85,9 @@ void DenseOptimInfo::Accumulate(const Values &values, const Lengths &lengths) {
    grad_offset += lengths[i];
  }
  float *grad_data = const_cast<float *>(values.data()) + grad_offset;
+#define google mindspore_private
  CHECK_EQ(size, static_cast<size_t>(lengths[grad_index]));
-
+#undef google
  for (size_t i = 0; i < size; i++) {
    accum_grad_data[i] += grad_data[i];
  }
--- a/mindspore/core/utils/log_adapter.cc
+++ b/mindspore/core/utils/log_adapter.cc
@ -23,6 +23,7 @@
 // namespace to support utils module definition
 namespace mindspore {
 #ifdef USE_GLOG
+#define google mindspore_private
 static std::string GetProcName() {
 #if defined(__APPLE__) || defined(__FreeBSD__)
  const char *appname = getprogname();
@ -71,6 +72,7 @@ static int GetGlogLevel(MsLogLevel level) {
      return google::GLOG_ERROR;
  }
 }
+
 // get threshold level
 static int GetThresholdLevel(std::string threshold) {
  if (threshold.empty()) {
@ -85,6 +87,7 @@ static int GetThresholdLevel(std::string threshold) {
    return google::GLOG_WARNING;
  }
 }
+#undef google
 #else

 #undef Dlog
@ -143,11 +146,13 @@ static const char *GetSubModuleName(SubModuleId module_id) {
 }
 void LogWriter::OutputLog(const std::ostringstream &msg) const {
 #ifdef USE_GLOG
+#define google mindspore_private
  auto submodule_name = GetSubModuleName(submodule_);
  google::LogMessage("", 0, GetGlogLevel(log_level_)).stream()
    << "[" << GetLogLevel(log_level_) << "] " << submodule_name << "(" << getpid() << "," << GetProcName()
    << "):" << GetTimeString() << " "
    << "[" << location_.file_ << ":" << location_.line_ << "] " << location_.func_ << "] " << msg.str() << std::endl;
+#undef google
 #else
  auto str_msg = msg.str();
  auto slog_module_id = (submodule_ == SM_MD ? MD : ME);
@ -479,6 +484,7 @@ __attribute__((constructor)) void mindspore_log_init(void) {
 void mindspore_log_init(void) {
 #endif
 #ifdef USE_GLOG
+#define google mindspore_private
  static bool is_glog_initialzed = false;
  if (!is_glog_initialzed) {
 #if !defined(_WIN32) && !defined(_WIN64)
@ -486,6 +492,7 @@ void mindspore_log_init(void) {
 #endif
    is_glog_initialzed = true;
  }
+#undef google
 #endif
  common_log_init();
 }
--- a/mindspore/core/utils/log_adapter.h
+++ b/mindspore/core/utils/log_adapter.h
@ -26,7 +26,9 @@
 #include "utils/overload.h"
 #include "./securec.h"
 #ifdef USE_GLOG
+#define google mindspore_private
 #include "glog/logging.h"
+#undef google
 #else
 #include "toolchain/slog.h"
 #endif
--- a/third_party/patch/glog/glog.patch001
+++ b/third_party/patch/glog/glog.patch001
@ -0,0 +1,11 @@
+diff -Npur glog/CMakeLists.txt glog_modify/CMakeLists.txt
+--- glog/CMakeLists.txt	2019-03-22 10:51:46.000000000 +0800
+++ glog_modify/CMakeLists.txt	2021-03-07 16:58:38.386879400 +0800
+@@ -470,6 +470,7 @@ add_library (glog
+ add_library(glog::glog ALIAS glog)
+ 
+ set_target_properties (glog PROPERTIES POSITION_INDEPENDENT_CODE ON)
+set_target_properties (glog PROPERTIES OUTPUT_NAME mindspore_glog)
+ 
+ if (UNWIND_LIBRARY)
+   target_link_libraries (glog PUBLIC ${UNWIND_LIBRARY})