!15977 Add MD Cropper

From: @mhmotallebi Reviewed-by: Signed-off-by:
2021-05-13 10:54:30 +08:00 · 2021-05-13 10:54:30 +08:00 · 4eb91ba6f9
parent 885c606a4b c6ed4151e7
commit 4eb91ba6f9
11 changed files with 967 additions and 14 deletions
--- a/cmake/package_lite.cmake
+++ b/cmake/package_lite.cmake
@ -12,6 +12,7 @@ if(SUPPORT_TRAIN)
    set(RUNTIME_LIB_DIR ${RUNTIME_PKG_NAME}/train/lib)
    set(MIND_DATA_INC_DIR ${RUNTIME_PKG_NAME}/train/include/dataset)
    set(TURBO_DIR ${RUNTIME_PKG_NAME}/train/third_party/libjpeg-turbo)
    set(SECUREC_DIR ${RUNTIME_PKG_NAME}/train/third_party/securec)
    set(MINDSPORE_LITE_LIB_NAME libmindspore-lite-train)
    set(BENCHMARK_NAME benchmark_train)
    set(BENCHMARK_ROOT_DIR ${RUNTIME_PKG_NAME}/tools/benchmark_train)
@ -21,6 +22,7 @@ else()
    set(RUNTIME_LIB_DIR ${RUNTIME_PKG_NAME}/inference/lib)
    set(MIND_DATA_INC_DIR ${RUNTIME_PKG_NAME}/inference/include/dataset)
    set(TURBO_DIR ${RUNTIME_PKG_NAME}/inference/third_party/libjpeg-turbo)
    set(SECUREC_DIR ${RUNTIME_PKG_NAME}/inference/third_party/securec)
    set(MINDSPORE_LITE_LIB_NAME libmindspore-lite)
    set(BENCHMARK_NAME benchmark)
    set(BENCHMARK_ROOT_DIR ${RUNTIME_PKG_NAME}/tools/benchmark)
@ -40,21 +42,33 @@ if(BUILD_MINDDATA STREQUAL "full")
    if(PLATFORM_ARM64)
        file(GLOB JPEGTURBO_LIB_LIST ${jpeg_turbo_LIBPATH}/*.so)
-        install(FILES ${TOP_DIR}/mindspore/lite/build/minddata/libminddata-lite.so
+        install(FILES ${TOP_DIR}/mindspore/lite/build/minddata/libminddata-lite.so DESTINATION
-                DESTINATION ${RUNTIME_LIB_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME})
+                ${RUNTIME_LIB_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME})
        install(FILES ${TOP_DIR}/mindspore/lite/build/minddata/libminddata-lite_static.a DESTINATION
                ${RUNTIME_LIB_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME})
        install(FILES ${JPEGTURBO_LIB_LIST} DESTINATION ${TURBO_DIR}/lib COMPONENT ${RUNTIME_COMPONENT_NAME})
        install(FILES ${TOP_DIR}/mindspore/lite/build/securec/src/libsecurec.a
                DESTINATION ${SECUREC_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME})
    elseif(PLATFORM_ARM32)
        file(GLOB JPEGTURBO_LIB_LIST ${jpeg_turbo_LIBPATH}/*.so)
        install(FILES ${TOP_DIR}/mindspore/lite/build/minddata/libminddata-lite.so DESTINATION
                ${RUNTIME_LIB_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME})
        install(FILES ${TOP_DIR}/mindspore/lite/build/minddata/libminddata-lite_static.a DESTINATION
                ${RUNTIME_LIB_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME})
        install(FILES ${JPEGTURBO_LIB_LIST} DESTINATION ${TURBO_DIR}/lib COMPONENT ${RUNTIME_COMPONENT_NAME})
        install(FILES ${TOP_DIR}/mindspore/lite/build/securec/src/libsecurec.a
                DESTINATION ${SECUREC_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME})
    else()
        install(FILES ${TOP_DIR}/mindspore/lite/build/minddata/libminddata-lite.so DESTINATION
                ${RUNTIME_LIB_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME})
        install(FILES ${TOP_DIR}/mindspore/lite/build/minddata/libminddata-lite_static.a DESTINATION
                ${RUNTIME_LIB_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME})
        install(FILES ${jpeg_turbo_LIBPATH}/libjpeg.so.62.3.0 DESTINATION ${TURBO_DIR}/lib
                RENAME libjpeg.so.62 COMPONENT ${RUNTIME_COMPONENT_NAME})
        install(FILES ${jpeg_turbo_LIBPATH}/libturbojpeg.so.0.2.0 DESTINATION ${TURBO_DIR}/lib
                RENAME libturbojpeg.so.0 COMPONENT ${RUNTIME_COMPONENT_NAME})
        install(FILES ${TOP_DIR}/mindspore/lite/build/securec/src/libsecurec.a
                DESTINATION ${SECUREC_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME})
    endif()
 endif()
--- a/mindspore/lite/minddata/CMakeLists.txt
+++ b/mindspore/lite/minddata/CMakeLists.txt
@ -283,22 +283,37 @@ if(BUILD_MINDDATA STREQUAL "full")
        )
    add_library(minddata-lite SHARED
-    ${MINDDATA_KERNELS_IMAGE_LITE_CV_FILES}
+            ${MINDDATA_KERNELS_IMAGE_LITE_CV_FILES}
-    ${CMAKE_CURRENT_SOURCE_DIR}/../src/common/log_adapter.cc
+            ${CMAKE_CURRENT_SOURCE_DIR}/../src/common/log_adapter.cc
-    ${CORE_DIR}/utils/ms_utils.cc
+            ${CORE_DIR}/utils/ms_utils.cc
-    ${MINDDATA_FULL_SRC}
+            ${MINDDATA_FULL_SRC}
-    )
+            )
    add_library(minddata-lite_static STATIC
            ${MINDDATA_KERNELS_IMAGE_LITE_CV_FILES}
            ${CMAKE_CURRENT_SOURCE_DIR}/../src/common/log_adapter.cc
            ${CORE_DIR}/utils/ms_utils.cc
            ${MINDDATA_FULL_SRC}
            )
    add_dependencies(minddata-lite fbs_src)
    add_dependencies(minddata-lite_static fbs_src)
    find_package(Threads REQUIRED)
    target_link_libraries(minddata-lite
-      securec
+        securec
-      mindspore::jpeg_turbo
+        mindspore::jpeg_turbo
-      mindspore::turbojpeg
+        mindspore::turbojpeg
-      mindspore::json
+        mindspore::json
-      Threads::Threads
+        Threads::Threads
-    )
+        )
    target_link_libraries(minddata-lite_static
        securec
        mindspore::jpeg_turbo
        mindspore::turbojpeg
        mindspore::json
        Threads::Threads
        )
    # ref: https://github.com/android/ndk/issues/1202
    if(PLATFORM_ARM32)
@ -307,10 +322,12 @@ if(BUILD_MINDDATA STREQUAL "full")
    MESSAGE(FATAL_ERROR "Cannot find libclang_rt.builtins-arm-androi2d.a in $ENV{ANDROID_NDK}")
    endif()
    target_link_libraries(minddata-lite ${LIBCLANG_RT_LIB})
    target_link_libraries(minddata-lite_static ${LIBCLANG_RT_LIB})
    endif()
    if(PLATFORM_ARM32 OR PLATFORM_ARM64)
-    target_link_libraries(minddata-lite log)
+        target_link_libraries(minddata-lite log)
        target_link_libraries(minddata-lite_static log)
    elseif()
 endif()
 elseif(BUILD_MINDDATA STREQUAL "wrapper")
--- a/mindspore/lite/tools/dataset/cropper/.gitignore
+++ b/mindspore/lite/tools/dataset/cropper/.gitignore
@ -0,0 +1 @@
 debug.txt
--- a/mindspore/lite/tools/dataset/cropper/CMakeLists.txt
+++ b/mindspore/lite/tools/dataset/cropper/CMakeLists.txt
@ -0,0 +1,33 @@
 cmake_minimum_required(VERSION 3.15.5)
 project(MinddataCropper)
 add_compile_definitions(_GLIBCXX_USE_CXX11_ABI=0)
 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17 -fPIE -fPIC -Wl,--allow-shlib-undefined -s")
 file(GLOB minddata_OBJ CONFIGURE_DEPENDS "tmp/*.o")
 if(NOT minddata_OBJ)
    message(FATAL_ERROR "Your code is not using any MindData functionality.\n \
    ... libminddata-lite_min.so is not needed\n... Terminating crop.sh")
 endif()
 message(STATUS ${CMAKE_CXX_COMPILER})
 add_custom_command(
        OUTPUT libminddata-lite_min.so
        PRE_BUILD
        COMMAND ${CMAKE_CXX_COMPILER}
        -shared
        -o libminddata-lite_min.so
        ${minddata_OBJ}
        ${EXTERNAL_DEPS}
        -pthread
        -std=c++17
        -fPIE -fPIC
        -s
 )
 add_custom_target(
        minddata-lite ALL
        DEPENDS libminddata-lite_min.so
 )
--- a/mindspore/lite/tools/dataset/cropper/README.md
+++ b/mindspore/lite/tools/dataset/cropper/README.md
@ -0,0 +1,71 @@
 # Objective
 The goal of this tool is to allow the user to reduce the size of MindData lite package they ship with their code.
 # How to run
 This tool has two parts: the first part only needs to be run once, when the source code for mindspore is changed
 while the second part should be run every time the user code changes.
 Note that you need to run this tool on the server side if you are planning to use your code on an edge device.
 ## Step 1: Configure the cropper tool
 You need to have mindspore installed on your system to run this python script.
 Additionally, you need to have the mindspore source code present in your system
 as this script processes mindspore's source code.
 To execute the first part simply run:
 ```console
 python cropper_configure.py
 ```
 ## Step 2: Crop the MindData lite package
 The second part needs to be run every time the user adds or removes one of MD operators in their code.
 For the second part, you need to run:
 ```console
 ./crop.sh -p <path to mindspore package> <source files>
 ```
 Note that you need to provide the name of all files that are using any of the MindData functionalities.
 `ANDROID_NDK` environment variable needs to be set as well if the target device is android.
 Example: `./crop.sh -p ~/mindspore/ foo.cc foo.h bar.cc bar.h`
 This code will create the __libminddata-lite_min.so__ library specific to your code and will also print for you a list of
 shared objects that your code depends on (including __libminddata-lite\_min.so__).
 Note that you need to copy these files to your target device and set the linker flag accordingly.
 # How it works
 The first step (configuration) creates a few of files that are needed in the second step.
 These files include _dependencies.txt_, _associations.txt_, and _debug.txt_.
 While the third file (_debug.txt_) is only for debugging purposes (debugging cropper tool),
 the other two files are used in the second part.
 _associations.txt_ contains the entry points (IR level source files) for ops that the user may use in their code.
 The other file, _dependencies.txt_, contains all dependencies for all those entry points.
 When the user runs the crop script, _parser.py_ will be run on their code to find the ops they have used.
 Afterwards, the text files will be used to keep the needed object files
 (by removing unnecessary object files from the static library containing all of them).
 Finally, the remaining object files will be used to create a new shared object file (_libminddata-lite\_min.so_).
 # Requirements
 Step 1:
 * Python3
 * mindspore
 * mindspore source code
 Step 2:
 * Python3
 * cmake
 * Android NDK (if target device is android)
--- a/mindspore/lite/tools/dataset/cropper/associations.txt
+++ b/mindspore/lite/tools/dataset/cropper/associations.txt
--- a/mindspore/lite/tools/dataset/cropper/build_lib.py
+++ b/mindspore/lite/tools/dataset/cropper/build_lib.py
@ -0,0 +1,154 @@
 # Copyright 2021 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
 """ build MindData lite minimum library """
 import glob
 import itertools
 import json
 from operator import itemgetter
 import os
 from pprint import pprint
 import sys
 import warnings
 import parser
 DEPENDENCY_FILENAME = 'dependencies.txt'
 ASSOCIATION_FILENAME = 'associations.txt'
 ALL_DEPS_FILENAME = 'needed_dependencies.txt'
 OBJECTS_DIR = 'tmp/'
 ESSENTIAL_OBJECTS = [
    # 'types.cc.o',
    # 'tensor_impl.cc.o',
    'random_sampler.cc.o',  # default value for datasets (may not exist in their code)
    'random_sampler_ir.cc.o',  # default value for datasets (may not exist in their code)
 ]
 def load_dependencies():
    """
    Read dependencies.txt and load it into a dict.
    :return: a dict containing list of dependencies for almost any file in MindData lite
    """
    if not os.path.isfile(DEPENDENCY_FILENAME):
        raise FileNotFoundError("dependency file ({}) does not exist.\n"
                                "Please run cropper_configure.py first.".format(DEPENDENCY_FILENAME))
    with open(DEPENDENCY_FILENAME) as f:
        dep_dict = json.load(f)
    return dep_dict
 def load_associations():
    """
    Read associations.txt and load it into a dict.
    :return: a dict containing entry point (a filename) for each op
    """
    if not os.path.isfile(ASSOCIATION_FILENAME):
        raise FileNotFoundError("association file ({}) does not exist.\n"
                                "Please run cropper_configure.py first.".format(ASSOCIATION_FILENAME))
    with open(ASSOCIATION_FILENAME) as f:
        _dict = json.load(f)
    return _dict
 def get_unique_dependencies(dependencies_dict, associations_dict, user_ops):
    """
    Find which dependencies we need to include according to the ops found in the user code.
    :param dependencies_dict: a dict containing list of dependencies for almost any file in MindData lite
    :param associations_dict: a dcit containing entry point (a filename) for each op
    :param user_ops: a list of ops found in the user code
    :return: a list of dependencies needed based on the user code
    """
    selected_entries = []  # itemgetter(*user_ops)(associations_dict)
    for op in user_ops:
        print('{} --> {}'.format(op, associations_dict[op]))
        selected_entries.append(associations_dict[op])
    selected_files = itemgetter(*selected_entries)(dependencies_dict)
    selected_files = list(itertools.chain(*selected_files))
    return sorted(list(set().union(selected_files)))
 def remove_unused_objects(final_deps, essentials, all_object_files):
    """
    Remove object files that are determined to be NOT needed to run user code
    as they are not in the dependencies of user code.
    :param final_deps: a list of dependencies needed based on the user code
    :param essentials: essential objects that should not be removed from final lib
    :param all_object_files: a lsit of all objects available in our static library
    :return: None
    """
    # find objects which are not part of any dependency (lstrip is needed for remove '_' added in crop.sh)
    to_be_removed = [x for x in all_object_files if not any(x.lstrip('_')[:-5] in y for y in final_deps)]
    # keep the ones that are not an essential object file. (lstrip is needed for remove '_' added in crop.sh)
    to_be_removed = [x for x in to_be_removed if not any(x.lstrip('_') in y for y in essentials)]
    print('Removing:', len(to_be_removed), 'unused objects.')
    pprint(sorted(to_be_removed))
    for filename in to_be_removed:
        os.remove(os.path.join(OBJECTS_DIR, filename))
 def main():
    # load tables created using cropper.py
    dependencies_dict = load_dependencies()
    associations_dict = load_associations()
    # get all objects filename
    all_object_files = [x[x.rfind('/') + 1:] for x in glob.glob('{}*.o'.format(OBJECTS_DIR))]
    print("All Obj files: {}".format(len(all_object_files)))
    # find ops in user code
    my_parser = parser.SimpleParser()
    temp = [my_parser.parse(x) for x in user_code_filenames]
    user_ops = set(itertools.chain(*temp))
    print('user ops: {}'.format(user_ops))
    # user is not using any MindData op
    if not user_ops:
        warnings.warn('No MindData Ops detected in your code...')
        remove_unused_objects([], [], all_object_files)
        with open(os.path.join(OBJECTS_DIR, ALL_DEPS_FILENAME), 'w') as _:
            pass
        exit(0)
    # find dependencies required (based on user ops)
    unique_deps = get_unique_dependencies(dependencies_dict, associations_dict, user_ops)
    print('Unique Deps (.h): {}'.format(len(unique_deps)))
    print('Unique Deps (.cc): {}'.format(len(list(filter(lambda x: x[-2:] == 'cc', unique_deps)))))
    # add essential files to dependency files
    final_deps = set(unique_deps + dependencies_dict['ESSENTIAL'])
    print('Total Deps (.h): {}'.format(len(final_deps)))
    # delete the rest of the object files from directory.
    remove_unused_objects(final_deps, ESSENTIAL_OBJECTS, all_object_files)
    # write all dependencies to the file (for extracting external ones)
    with open(os.path.join(OBJECTS_DIR, ALL_DEPS_FILENAME), 'w') as fout:
        fout.write("\n".join(unique_deps) + '\n')
 if __name__ == "__main__":
    # get user code filename(s) as argument(s) to code
    if len(sys.argv) <= 1:
        print("usage: python build_lib.py <xxx.y> [<xxx.z>]")
        exit(1)
    user_code_filenames = sys.argv[1:]
    main()
--- a/mindspore/lite/tools/dataset/cropper/crop.sh
+++ b/mindspore/lite/tools/dataset/cropper/crop.sh
@ -0,0 +1,188 @@
 #!/bin/bash
 usage()
 {
  echo "Usage:"
  echo "bash crop.sh -p <path-to-mindspore-directory> <source-file> [<more-source-files>] \\"
  echo "bash crop.sh -h \\"
  echo ""
  echo "Options:"
  echo "    -p path to mindspore directory"
  echo "    -h print usage"
 }
 # check and set options
 checkopts()
 {
  while getopts ':p:h' opt
  do
    OPTARG=$(echo ${OPTARG} | tr '[A-Z]' '[a-z]')
    case "${opt}" in
      p)
        MINDSPORE_PATH="$(cd "${OPTARG}" &> /dev/null && pwd )"
        ;;
      h)
        usage
        exit 1
        ;;
      *)
        echo "Unknown option: \"${OPTARG}\""
        usage
        exit 1
    esac
  done
 }
 checkopts "$@"
 # exit if less than 3 args are given by user
 if [ $# -lt 3 ]; then
  usage
  exit 1
 fi
 # exit if mindspore path is not given by user
 if [ -z "${MINDSPORE_PATH}" ]; then
  echo -e "\e[31mPlease set MINDSPORE_PATH environment variable.\e[0m"
  exit 1
 fi
 ORIGNAL_PATH="$PWD"
 FILE_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
 # getting absolute paths for user provided filenames
 USER_CODES=""
 for i in "${@:OPTIND}";
 do
  USER_CODES+="$(cd "$(dirname "${i}" )" &> /dev/null && pwd )/$(basename "${i}")"
 done
 # exit if user has not given any argument as their code
 if [ -z "${USER_CODES}" ]; then
  echo -e "\e[31mPlease provide your file names as arguments.\e[0m"
  exit 1
 fi
 echo "Provided files: $USER_CODES"
 echo "MS PATH: $MINDSPORE_PATH"
 echo "CWD: $ORIGNAL_PATH"
 echo "File PATH: $FILE_PATH"
 cd $FILE_PATH
 MD_LIB_FILENAME="libminddata-lite_static.a"
 # locate original MindData lite library
 MD_LIB_PATH=`find $MINDSPORE_PATH -name "${MD_LIB_FILENAME}" | head -n 1`
 if [ -z "${MD_LIB_PATH}" ]; then
  echo -e "\e[31mMindData lite static library could not be found.\e[0m"
  cd $ORIGNAL_PATH
  exit 1
 fi
 # extract all objects of static lib to tmp/
 mkdir -p tmp
 cp $MD_LIB_PATH tmp
 cd tmp
 # extract objects with identical names by prepending (one or more) '_' to their names
 # (this scruipt supports more than 2 duplicate filenames)
 DUPLICATES=`ar t "${MD_LIB_FILENAME}" | sort | uniq -d`
 for dup in $DUPLICATES;
 do
  i=0
  prepend_var="_"
  while :
  do
    i=$((i + 1))
    # check if more duplicates are available (break otherwise)
    error_output=$(ar xN $i "${MD_LIB_FILENAME}" $dup  2>&1)
    if [ -n "$error_output" ]; then
      break
    fi
    mv $dup "${prepend_var}${dup}"
    prepend_var="${prepend_var}_"
  done
 done
 # extract unique files from static library
 UNIQUES=`ar t "${MD_LIB_FILENAME}" | sort | uniq -u`
 ar x "${MD_LIB_FILENAME}" ${UNIQUES}
 cd ..
 # remove unused object files
 # write needed depsendencies to tmp/needed_dependencies.txt
 python build_lib.py ${USER_CODES}
 retVal=$?
 if [ $retVal -ne 0 ]; then
  cd $ORIGNAL_PATH
  exit 1
 fi
 LD_SEP='\n'
 EX_SEP=$';'
 LD_PATHS=""
 EXTERNAL_DEPS=""
 # locate external dependencies for MindData lite
 LIBJPEG_PATH=`find $MINDSPORE_PATH -name "libjpeg.so*" | head -n 1`
 LIBTURBOJPEG_PATH=`find $MINDSPORE_PATH -name "libturbojpeg.so*" | head -n 1`
 LIBSECUREC_PATH=`find $MINDSPORE_PATH -name libsecurec.a | head -n 1`
 # resolve symbolc links
 if [ "$(uname)" == "Darwin" ]; then
  c=$(file -b "$(readlink $LIBJPEG_PATH)")
 elif [ "$(expr substr "$(uname -s)" 1 5)" == "Linux" ]; then
  c=$(file -b "$(readlink -f $LIBJPEG_PATH)")
 fi
 # detect system architecture
 IFS="," read -r -a array <<< "$c"
 TARGET_ARCHITECTURE=${array[1]##* }
 echo "Architecture: $TARGET_ARCHITECTURE"
 # exit if $ANDROID_NDK is not set by user for ARM32 or ARM64
 if [ "$TARGET_ARCHITECTURE" == "ARM64" ]; then
  if [ -z "${ANDROID_NDK}" ]; then
    echo -e "\e[31mPlease set ANDROID_NDK environment variable.\e[0m"
    cd $ORIGNAL_PATH
    exit 1
  fi
 elif [ "$TARGET_ARCHITECTURE" == "ARM32" ]; then
  if [ -z "${ANDROID_NDK}" ]; then
    echo -e "\e[31mPlease set ANDROID_NDK environment variable.\e[0m"
    cd $ORIGNAL_PATH
    exit 1
  fi
  # add LIBCLANG_RT_LIB for ARM32
  LIBCLANG_RT_LIB=`find $ANDROID_NDK -name libclang_rt.builtins-arm-android.a | head -n 1`
  EXTERNAL_DEPS=${EXTERNAL_DEPS}${LIBCLANG_RT_LIB}${EX_SEP}
 else
  echo "No need for ANDROID_NDK"
 fi
 # Note: add .a files only to EXTERNAL_DEPS.
 if grep -q 'jpeg' "tmp/needed_dependencies.txt"; then
  LD_PATHS=${LD_PATHS}${LIBJPEG_PATH}${LD_SEP}
  LD_PATHS=${LD_PATHS}${LIBTURBOJPEG_PATH}${LD_SEP}
  EXTERNAL_DEPS=${EXTERNAL_DEPS}${LIBJPEG_PATH}${EX_SEP}
  EXTERNAL_DEPS=${EXTERNAL_DEPS}${LIBTURBOJPEG_PATH}${EX_SEP}
 fi
 # we always need securec library
 EXTERNAL_DEPS=${EXTERNAL_DEPS}${LIBSECUREC_PATH}${EX_SEP}
 # create .so lib from remaining object files
 cmake -S . -B . \
      -DEXTERNAL_DEPS="${EXTERNAL_DEPS}" \
      -DARCHITECTURE=$TARGET_ARCHITECTURE
 # no dependencies to MindData lite
 retVal=$?
 if [ $retVal -eq 0 ]; then
  make
  echo -e "\e[32mLibrary was built successfully, The new list of MindData-related dependencies is as follows:\e[0m"
  echo -e "\e[36m$LD_PATHS$PWD/libminddata-lite_min.so\e[0m"
 fi
 rm -rf tmp/
 cd $ORIGNAL_PATH
--- a/mindspore/lite/tools/dataset/cropper/cropper_configure.py
+++ b/mindspore/lite/tools/dataset/cropper/cropper_configure.py
@ -0,0 +1,389 @@
 # Copyright 2021 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
 """ configure cropper tool """
 from functools import lru_cache
 import glob
 import json
 import os
 import queue
 import re
 import subprocess
 from mindspore import log as logger
 DEFINE_STR = "-DENABLE_ANDROID -DENABLE_ARM -DENABLE_ARM64 -DENABLE_NEON -DNO_DLIB -DUSE_ANDROID_LOG -DANDROID"
 ASSOCIATIONS_FILENAME = 'associations.txt'
 DEPENDENCIES_FILENAME = 'dependencies.txt'
 ERRORS_FILENAME = 'debug.txt'
 OUTPUT_LOCATION = "mindspore/lite/tools/dataset/cropper"
 # needed for gcc command for include directories
 MANUAL_HEADERS = [
    ".",
    "mindspore",
    "mindspore/ccsrc",
    "mindspore/ccsrc/minddata/dataset",
    "mindspore/ccsrc/minddata/dataset/kernels/image",
    "mindspore/core",
    "mindspore/lite",
 ]
 # To stop gcc command once reaching these external headers
 # (not all of them may be used now in MindData lite)
 EXTERNAL_DEPS = [
    "graphengine/inc/external",
    "akg/third_party/fwkacllib/inc",
    "third_party",
    "third_party/securec/include",
    "build/mindspore/_deps/sqlite-src",
    "build/mindspore/_deps/pybind11-src/include",
    "build/mindspore/_deps/tinyxml2-src",
    "build/mindspore/_deps/jpeg_turbo-src",
    "build/mindspore/_deps/jpeg_turbo-src/_build",
    "build/mindspore/_deps/icu4c-src/icu4c/source/i18n",
    "build/mindspore/_deps/icu4c-src/icu4c/source/common",
    "mindspore/lite/build/_deps/tinyxml2-src",
    "mindspore/lite/build/_deps/jpeg_turbo-src",
    "mindspore/lite/build/_deps/jpeg_turbo-src/_build",
    "mindspore/lite/build/_deps/nlohmann_json-src",
 ]
 # API files which the corresponding objects and all objects for their dependencies must always be included.
 ESSENTIAL_FILES_1 = [
    "api/data_helper.cc",
    "api/datasets.cc",
    "api/execute.cc",
    "api/iterator.cc",
 ]
 # API files which the corresponding objects must always be included.
 # (corresponding IR files will be included according to user ops)
 ESSENTIAL_FILES_2 = [
    "api/text.cc",
    "api/transforms.cc",
    "api/samplers.cc",
    "api/vision.cc",
 ]
 DATASET_PATH = "mindspore/ccsrc/minddata/dataset"
 OPS_DIRS = [
    "engine/ir/datasetops",
    "engine/ir/datasetops/source",
    "engine/ir/datasetops/source/samplers",
    "kernels/ir/vision",
    "kernels/ir/data",
    "text/ir/kernels",
 ]
 def extract_classname_samplers(header_content):
    """
    Use regex to find class names in header files of samplers
    :param header_content: string containing header of a sampler IR file
    :return: list of sampler classes found
    """
    return re.findall(r"(?<=class )[\w\d_]+(?=Obj : )", header_content)
 def extract_classname_source_node(header_content):
    """
    Use regex to find class names in header files of source nodes
    :param header_content: string containing header of a source node IR file
    :return: list of source node classes found
    """
    return re.findall(r"(?<=class )[\w\d_]+(?=Node : )", header_content)
 def extract_classname_nonsource_node(header_content):
    """
    Use regex to find class names in header files of non-source nodes
    :param header_content: string containing header of a non-source IR file
    :return: list of non-source node classes found
    """
    return re.findall(r"(?<=class )[\w\d_]+(?=Node : )", header_content)
 def extract_classname_vision(header_content):
    """
    Use regex to find class names in header files of vision ops
    :param header_content: string containing header of a vision op IR file
    :return: list of vision ops found
    """
    return re.findall(r"(?<=class )[\w\d_]+(?=Operation : )", header_content)
 def extract_classname_data(header_content):
    """
    Use regex to find class names in header files of data ops
    :param header_content: string containing header of a data op IR file
    :return: list of data ops found
    """
    return re.findall(r"(?<=class )[\w\d_]+(?=Operation : )", header_content)
 def extract_classname_text(header_content):
    """
    Use regex to find class names in header files of text ops
    :param header_content: string containing header of a text op IR file
    :return: list of text ops found
    """
    return re.findall(r"(?<=class )[\w\d_]+(?=Operation : )", header_content)
 # For each op type (directory) store the corresponding function which extracts op name
 registered_functions = {
    os.path.join(DATASET_PATH, 'engine/ir/datasetops/source/samplers'): extract_classname_samplers,
    os.path.join(DATASET_PATH, 'engine/ir/datasetops/source'): extract_classname_source_node,
    os.path.join(DATASET_PATH, 'engine/ir/datasetops'): extract_classname_nonsource_node,
    os.path.join(DATASET_PATH, 'kernels/ir/vision'): extract_classname_vision,
    os.path.join(DATASET_PATH, 'kernels/ir/data'): extract_classname_data,
    os.path.join(DATASET_PATH, 'text/ir/kernels'): extract_classname_text,
 }
 def get_headers():
    """
    Get the headers flag: "-Ixx/yy -Ixx/zz ..."
    :return: a string to be passed to compiler
    """
    headers_paths = MANUAL_HEADERS + EXTERNAL_DEPS
    output = "-I{}/".format("/ -I".join(headers_paths))
    return output
@lru_cache(maxsize=1024)
 def get_dependencies_of_file(headers_flag, filename):
    """
    Create dependency list for a file (file0.cc):
    file0.cc.o: file1.h, file2.h, ...
    :param headers_flag: string containing headers include paths with -I prepended to them.
    :param filename: a string containing path of a file.
    :return: a list of file names [file0.cc, file1.h, file2.h, file3.h] and error string
    """
    command = 'gcc -MM -MG {0} {1} {2}'.format(filename, DEFINE_STR, headers_flag)
    stdout, stderr = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
    deps = re.split(r'[\s\\]+', stdout.decode('utf-8').strip(), flags=re.MULTILINE)[1:]
    return deps, stderr.decode('utf-8')
 def needs_processing(dep_cc, processed_cc, queue_cc_set):
    """
    Determine if a file's dependencies need to be processed.
    :param dep_cc: the candidate file to be processed by gcc
    :param processed_cc: set of files that have been already processed.
    :param queue_cc_set: files currently in the queue (to be processed)
    :return: boolean, whether the file should be further processed by gcc.
    """
    # don't add the file to the queue if already processed
    if dep_cc in processed_cc:
        return False
    # don't add the file to the queue if it is already there
    if dep_cc in queue_cc_set:
        return False
    # if file doesn't exist, don't process as it will cause error (may happen for cache)
    if not os.path.isfile(dep_cc):
        return False
    return True
 def build_source_file_path(dep_h):
    """
    Given the path to a header file, find the path for the associated source file.
    - if an external dependency, return "EXTERNAL"
    - if not found, keep the header file's path
    :param dep_h: a string containing path to the header file
    :return: dep_cc: a string containing path to the source file
    """
    for x in EXTERNAL_DEPS:
        if x in dep_h:
            dep_cc = "EXTERNAL"
            return dep_cc
    if 'include/api/types.h' in dep_h:
        dep_cc = "mindspore/ccsrc/cxx_api/types.cc"
        return dep_cc
    dep_cc = dep_h.replace('.hpp', '.cc').replace('.h', '.cc')
    if not os.path.isfile(dep_cc):
        dep_cc = dep_h
    return dep_cc
 def get_all_dependencies_of_file(headers_flag, filename):
    """
    Create dependency list for a file (incl. all source files needed).
    :param headers_flag: string containing headers include paths with -I prepended to them.
    :param filename: a string containing path of a file.
    :return: all dependencies of that file and the error string
    """
    errors = []
    # a queue to process files
    queue_cc = queue.SimpleQueue()
    # a set of items that have ever been in queue_cc (faster access time)
    queue_cc_set = set()
    # store processed files
    processed_cc = set()
    # add the source file to the queue
    queue_cc.put(filename)
    queue_cc_set.add(filename)
    while not queue_cc.empty():
        # process the first item in the queue
        curr_cc = queue_cc.get()
        deps, error = get_dependencies_of_file(headers_flag, curr_cc)
        errors.append(error)
        processed_cc.add(curr_cc)
        # prepare its dependencies for processing
        for dep_h in deps:
            dep_cc = build_source_file_path(dep_h)
            # ignore if marked as an external dependency
            if dep_cc == "EXTERNAL":
                processed_cc.add(dep_h)
                continue
            # add to queue if needs processing
            if needs_processing(dep_cc, processed_cc, queue_cc_set):
                queue_cc.put(dep_cc)
                queue_cc_set.add(dep_cc)
    logger.debug('file: {} | deps: {}'.format(filename[filename.rfind('/') + 1:], len(processed_cc)))
    return list(processed_cc), "".join(errors)
 def get_deps_essential(headers_flag):
    """
    Return dependencies required for any run (essential).
    :param headers_flag: string containing headers include paths with -I prepended to them.
    :return: a list of essential files, and the error string
    """
    essentials = []
    errors = []
    # find dependencies for ESSENTIAL_FILES_1 as we need them too.
    for filename in [os.path.join(DATASET_PATH, x) for x in ESSENTIAL_FILES_1]:
        deps, err = get_all_dependencies_of_file(headers_flag, filename)
        errors.append(err)
        essentials.extend(deps)
        essentials.append(filename)
    # we only need ESSENTIAL_FILES_2 themselves (IR files are split)
    for filename in [os.path.join(DATASET_PATH, x) for x in ESSENTIAL_FILES_2]:
        essentials.append(filename)
    essentials = list(set(essentials))
    return essentials, "".join(errors)
 def get_deps_non_essential(headers_flag):
    """
    Find the entry points (IR Level) for each op and write them in associations dict.
    Starting from these entry point, recursively find the dependencies for each file and write in a dict.
    :param headers_flag: string containing headers include paths with -I prepended to them.
    :return: dependencies dict, associations dict, the error string
    """
    dependencies = dict()  # what files each file imports
    associations = dict()  # what file each op is defined in (IR level)
    errors = []
    for dirname in [os.path.join(DATASET_PATH, x) for x in OPS_DIRS]:
        # Get the proper regex function for this directory
        if dirname not in registered_functions:
            raise ValueError("Directory has no registered regex function:", dirname)
        extract_classname = registered_functions[dirname]
        # iterate over source files in the directory
        for src_filename in glob.glob("{}/*.cc".format(dirname)):
            # get the dependencies of source file
            deps, err = get_all_dependencies_of_file(headers_flag, src_filename)
            dependencies[src_filename] = deps
            errors.append(err)
            # locate the corresponding header file and read it
            header_filename = src_filename.replace('.cc', '.h')
            if not os.path.isfile(header_filename):
                raise ValueError("Header file doesn't exist!")
            with open(header_filename, 'r') as f:
                content = f.read().strip()
            # extract ops from header file
            ops = extract_classname(content)
            # add the op to associations table
            for raw_op in ops:
                op = raw_op.lower().replace('_', '')
                associations[op] = src_filename
    return dependencies, associations, "".join(errors)
 def main():
    """
    Configure the cropper tool by creating  associations.txt and dependencies.txt
    """
    errors = ""
    dependencies = {}
    # convert to a single string with '-I' prepended to each dir name
    headers_flag = get_headers()
    # get dependencies for essential files
    all_deps, err = get_deps_essential(headers_flag)
    dependencies['ESSENTIAL'] = all_deps
    errors += err
    logger.debug('len(ESSENTIAL): {}'.format(len(dependencies['ESSENTIAL'])))
    # get dependencies for other files (non-essentials)
    other_dependencies, all_associations, err = get_deps_non_essential(headers_flag)
    dependencies.update(other_dependencies)
    errors += err
    with open(os.path.join(OUTPUT_LOCATION, DEPENDENCIES_FILENAME), "w") as f:
        json.dump(dependencies, f)
    with open(os.path.join(OUTPUT_LOCATION, ASSOCIATIONS_FILENAME), "w") as f:
        json.dump(all_associations, f)
    with open(os.path.join(OUTPUT_LOCATION, ERRORS_FILENAME), "w") as f:
        f.write(errors)
 if __name__ == "__main__":
    logger.info('STARTING: cropper_configure.py ')
    original_path = os.getcwd()
    script_path = os.path.dirname(os.path.abspath(__file__))
    try:
        # change directory to mindspore directory
        os.chdir(os.path.join(script_path, "../../../../.."))
        main()
    except (OSError, IndexError, KeyError):
        logger.error('FAILED: cropper_configure.py!')
        raise
    else:
        logger.info('SUCCESS: cropper_configure.py ')
    finally:
        os.chdir(original_path)
--- a/mindspore/lite/tools/dataset/cropper/dependencies.txt
+++ b/mindspore/lite/tools/dataset/cropper/dependencies.txt
--- a/mindspore/lite/tools/dataset/cropper/parser.py
+++ b/mindspore/lite/tools/dataset/cropper/parser.py
@ -0,0 +1,84 @@
 # Copyright 2021 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
 """ extract ops from user code """
 from abc import ABC, abstractmethod
 from functools import lru_cache
 import json
 import os
 ASSOCIATION_FILENAME = 'associations.txt'
@lru_cache(maxsize=1)
 def _load_ops_names():
    """
    Get the name of all ops available in MindData lite.
    :return: a list of all available ops in MindData lite
    """
    with open(os.path.expanduser(ASSOCIATION_FILENAME), 'r') as f:
        _dict = json.load(f)
    return _dict.keys()
 class Parser(ABC):
    """
    Abstract Base Class for parsers for looking up ops in user code.
    """
    def __init__(self):
        self._all_ops = _load_ops_names()
    @abstractmethod
    def parse(self, user_filename):
        """
        finds ops detected in the user code
        :param user_filename: string, name of file containing user code
        :return: list of ops found in the user code
        """
 class SimpleParser(Parser):
    """
    A simple parser that works by string matching:
    Code uses an op if it is found anywhere in the text.
    """
    def parse(self, user_filename):
        """
        Find and return ops in the user file.
        :param user_filename: filename of user code
        :return: a list of ops present in the file
        """
        if not os.path.isfile(user_filename):
            raise FileNotFoundError("file does not exist: {}".format(user_filename))
        with open(user_filename) as f:
            data = f.read().strip()
        user_ops = self._simple_string_match(data)
        return user_ops
    def _simple_string_match(self, user_text):
        """
        Find and return ops in the user code (provided as a string).
        :param user_text: string containing user code
        :return: a list of ops found in the user_text
        """
        processed_user_text = user_text.strip().lower()
        user_ops = [op for op in self._all_ops if op in processed_user_text]
        return user_ops