MD cropper

This commit is contained in:
mohammad 2021-05-11 17:44:51 -04:00
parent 8c5680360d
commit c6ed4151e7
11 changed files with 967 additions and 14 deletions

View File

@ -12,6 +12,7 @@ if(SUPPORT_TRAIN)
set(RUNTIME_LIB_DIR ${RUNTIME_PKG_NAME}/train/lib)
set(MIND_DATA_INC_DIR ${RUNTIME_PKG_NAME}/train/include/dataset)
set(TURBO_DIR ${RUNTIME_PKG_NAME}/train/third_party/libjpeg-turbo)
set(SECUREC_DIR ${RUNTIME_PKG_NAME}/train/third_party/securec)
set(MINDSPORE_LITE_LIB_NAME libmindspore-lite-train)
set(BENCHMARK_NAME benchmark_train)
set(BENCHMARK_ROOT_DIR ${RUNTIME_PKG_NAME}/tools/benchmark_train)
@ -21,6 +22,7 @@ else()
set(RUNTIME_LIB_DIR ${RUNTIME_PKG_NAME}/inference/lib)
set(MIND_DATA_INC_DIR ${RUNTIME_PKG_NAME}/inference/include/dataset)
set(TURBO_DIR ${RUNTIME_PKG_NAME}/inference/third_party/libjpeg-turbo)
set(SECUREC_DIR ${RUNTIME_PKG_NAME}/inference/third_party/securec)
set(MINDSPORE_LITE_LIB_NAME libmindspore-lite)
set(BENCHMARK_NAME benchmark)
set(BENCHMARK_ROOT_DIR ${RUNTIME_PKG_NAME}/tools/benchmark)
@ -40,21 +42,33 @@ if(BUILD_MINDDATA STREQUAL "full")
if(PLATFORM_ARM64)
file(GLOB JPEGTURBO_LIB_LIST ${jpeg_turbo_LIBPATH}/*.so)
install(FILES ${TOP_DIR}/mindspore/lite/build/minddata/libminddata-lite.so
DESTINATION ${RUNTIME_LIB_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME})
install(FILES ${TOP_DIR}/mindspore/lite/build/minddata/libminddata-lite.so DESTINATION
${RUNTIME_LIB_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME})
install(FILES ${TOP_DIR}/mindspore/lite/build/minddata/libminddata-lite_static.a DESTINATION
${RUNTIME_LIB_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME})
install(FILES ${JPEGTURBO_LIB_LIST} DESTINATION ${TURBO_DIR}/lib COMPONENT ${RUNTIME_COMPONENT_NAME})
install(FILES ${TOP_DIR}/mindspore/lite/build/securec/src/libsecurec.a
DESTINATION ${SECUREC_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME})
elseif(PLATFORM_ARM32)
file(GLOB JPEGTURBO_LIB_LIST ${jpeg_turbo_LIBPATH}/*.so)
install(FILES ${TOP_DIR}/mindspore/lite/build/minddata/libminddata-lite.so DESTINATION
${RUNTIME_LIB_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME})
install(FILES ${TOP_DIR}/mindspore/lite/build/minddata/libminddata-lite_static.a DESTINATION
${RUNTIME_LIB_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME})
install(FILES ${JPEGTURBO_LIB_LIST} DESTINATION ${TURBO_DIR}/lib COMPONENT ${RUNTIME_COMPONENT_NAME})
install(FILES ${TOP_DIR}/mindspore/lite/build/securec/src/libsecurec.a
DESTINATION ${SECUREC_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME})
else()
install(FILES ${TOP_DIR}/mindspore/lite/build/minddata/libminddata-lite.so DESTINATION
${RUNTIME_LIB_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME})
install(FILES ${TOP_DIR}/mindspore/lite/build/minddata/libminddata-lite_static.a DESTINATION
${RUNTIME_LIB_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME})
install(FILES ${jpeg_turbo_LIBPATH}/libjpeg.so.62.3.0 DESTINATION ${TURBO_DIR}/lib
RENAME libjpeg.so.62 COMPONENT ${RUNTIME_COMPONENT_NAME})
install(FILES ${jpeg_turbo_LIBPATH}/libturbojpeg.so.0.2.0 DESTINATION ${TURBO_DIR}/lib
RENAME libturbojpeg.so.0 COMPONENT ${RUNTIME_COMPONENT_NAME})
install(FILES ${TOP_DIR}/mindspore/lite/build/securec/src/libsecurec.a
DESTINATION ${SECUREC_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME})
endif()
endif()

View File

@ -283,22 +283,37 @@ if(BUILD_MINDDATA STREQUAL "full")
)
add_library(minddata-lite SHARED
${MINDDATA_KERNELS_IMAGE_LITE_CV_FILES}
${CMAKE_CURRENT_SOURCE_DIR}/../src/common/log_adapter.cc
${CORE_DIR}/utils/ms_utils.cc
${MINDDATA_FULL_SRC}
)
${MINDDATA_KERNELS_IMAGE_LITE_CV_FILES}
${CMAKE_CURRENT_SOURCE_DIR}/../src/common/log_adapter.cc
${CORE_DIR}/utils/ms_utils.cc
${MINDDATA_FULL_SRC}
)
add_library(minddata-lite_static STATIC
${MINDDATA_KERNELS_IMAGE_LITE_CV_FILES}
${CMAKE_CURRENT_SOURCE_DIR}/../src/common/log_adapter.cc
${CORE_DIR}/utils/ms_utils.cc
${MINDDATA_FULL_SRC}
)
add_dependencies(minddata-lite fbs_src)
add_dependencies(minddata-lite_static fbs_src)
find_package(Threads REQUIRED)
target_link_libraries(minddata-lite
securec
mindspore::jpeg_turbo
mindspore::turbojpeg
mindspore::json
Threads::Threads
)
securec
mindspore::jpeg_turbo
mindspore::turbojpeg
mindspore::json
Threads::Threads
)
target_link_libraries(minddata-lite_static
securec
mindspore::jpeg_turbo
mindspore::turbojpeg
mindspore::json
Threads::Threads
)
# ref: https://github.com/android/ndk/issues/1202
if(PLATFORM_ARM32)
@ -307,10 +322,12 @@ if(BUILD_MINDDATA STREQUAL "full")
MESSAGE(FATAL_ERROR "Cannot find libclang_rt.builtins-arm-androi2d.a in $ENV{ANDROID_NDK}")
endif()
target_link_libraries(minddata-lite ${LIBCLANG_RT_LIB})
target_link_libraries(minddata-lite_static ${LIBCLANG_RT_LIB})
endif()
if(PLATFORM_ARM32 OR PLATFORM_ARM64)
target_link_libraries(minddata-lite log)
target_link_libraries(minddata-lite log)
target_link_libraries(minddata-lite_static log)
elseif()
endif()
elseif(BUILD_MINDDATA STREQUAL "wrapper")

View File

@ -0,0 +1 @@
debug.txt

View File

@ -0,0 +1,33 @@
cmake_minimum_required(VERSION 3.15.5)
project(MinddataCropper)
add_compile_definitions(_GLIBCXX_USE_CXX11_ABI=0)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17 -fPIE -fPIC -Wl,--allow-shlib-undefined -s")
file(GLOB minddata_OBJ CONFIGURE_DEPENDS "tmp/*.o")
if(NOT minddata_OBJ)
message(FATAL_ERROR "Your code is not using any MindData functionality.\n \
... libminddata-lite_min.so is not needed\n... Terminating crop.sh")
endif()
message(STATUS ${CMAKE_CXX_COMPILER})
add_custom_command(
OUTPUT libminddata-lite_min.so
PRE_BUILD
COMMAND ${CMAKE_CXX_COMPILER}
-shared
-o libminddata-lite_min.so
${minddata_OBJ}
${EXTERNAL_DEPS}
-pthread
-std=c++17
-fPIE -fPIC
-s
)
add_custom_target(
minddata-lite ALL
DEPENDS libminddata-lite_min.so
)

View File

@ -0,0 +1,71 @@
# Objective
The goal of this tool is to allow the user to reduce the size of MindData lite package they ship with their code.
# How to run
This tool has two parts: the first part only needs to be run once, when the source code for mindspore is changed
while the second part should be run every time the user code changes.
Note that you need to run this tool on the server side if you are planning to use your code on an edge device.
## Step 1: Configure the cropper tool
You need to have mindspore installed on your system to run this python script.
Additionally, you need to have the mindspore source code present in your system
as this script processes mindspore's source code.
To execute the first part simply run:
```console
python cropper_configure.py
```
## Step 2: Crop the MindData lite package
The second part needs to be run every time the user adds or removes one of MD operators in their code.
For the second part, you need to run:
```console
./crop.sh -p <path to mindspore package> <source files>
```
Note that you need to provide the name of all files that are using any of the MindData functionalities.
`ANDROID_NDK` environment variable needs to be set as well if the target device is android.
Example: `./crop.sh -p ~/mindspore/ foo.cc foo.h bar.cc bar.h`
This code will create the __libminddata-lite_min.so__ library specific to your code and will also print for you a list of
shared objects that your code depends on (including __libminddata-lite\_min.so__).
Note that you need to copy these files to your target device and set the linker flag accordingly.
# How it works
The first step (configuration) creates a few of files that are needed in the second step.
These files include _dependencies.txt_, _associations.txt_, and _debug.txt_.
While the third file (_debug.txt_) is only for debugging purposes (debugging cropper tool),
the other two files are used in the second part.
_associations.txt_ contains the entry points (IR level source files) for ops that the user may use in their code.
The other file, _dependencies.txt_, contains all dependencies for all those entry points.
When the user runs the crop script, _parser.py_ will be run on their code to find the ops they have used.
Afterwards, the text files will be used to keep the needed object files
(by removing unnecessary object files from the static library containing all of them).
Finally, the remaining object files will be used to create a new shared object file (_libminddata-lite\_min.so_).
# Requirements
Step 1:
* Python3
* mindspore
* mindspore source code
Step 2:
* Python3
* cmake
* Android NDK (if target device is android)

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,154 @@
# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
""" build MindData lite minimum library """
import glob
import itertools
import json
from operator import itemgetter
import os
from pprint import pprint
import sys
import warnings
import parser
DEPENDENCY_FILENAME = 'dependencies.txt'
ASSOCIATION_FILENAME = 'associations.txt'
ALL_DEPS_FILENAME = 'needed_dependencies.txt'
OBJECTS_DIR = 'tmp/'
ESSENTIAL_OBJECTS = [
# 'types.cc.o',
# 'tensor_impl.cc.o',
'random_sampler.cc.o', # default value for datasets (may not exist in their code)
'random_sampler_ir.cc.o', # default value for datasets (may not exist in their code)
]
def load_dependencies():
"""
Read dependencies.txt and load it into a dict.
:return: a dict containing list of dependencies for almost any file in MindData lite
"""
if not os.path.isfile(DEPENDENCY_FILENAME):
raise FileNotFoundError("dependency file ({}) does not exist.\n"
"Please run cropper_configure.py first.".format(DEPENDENCY_FILENAME))
with open(DEPENDENCY_FILENAME) as f:
dep_dict = json.load(f)
return dep_dict
def load_associations():
"""
Read associations.txt and load it into a dict.
:return: a dict containing entry point (a filename) for each op
"""
if not os.path.isfile(ASSOCIATION_FILENAME):
raise FileNotFoundError("association file ({}) does not exist.\n"
"Please run cropper_configure.py first.".format(ASSOCIATION_FILENAME))
with open(ASSOCIATION_FILENAME) as f:
_dict = json.load(f)
return _dict
def get_unique_dependencies(dependencies_dict, associations_dict, user_ops):
"""
Find which dependencies we need to include according to the ops found in the user code.
:param dependencies_dict: a dict containing list of dependencies for almost any file in MindData lite
:param associations_dict: a dcit containing entry point (a filename) for each op
:param user_ops: a list of ops found in the user code
:return: a list of dependencies needed based on the user code
"""
selected_entries = [] # itemgetter(*user_ops)(associations_dict)
for op in user_ops:
print('{} --> {}'.format(op, associations_dict[op]))
selected_entries.append(associations_dict[op])
selected_files = itemgetter(*selected_entries)(dependencies_dict)
selected_files = list(itertools.chain(*selected_files))
return sorted(list(set().union(selected_files)))
def remove_unused_objects(final_deps, essentials, all_object_files):
"""
Remove object files that are determined to be NOT needed to run user code
as they are not in the dependencies of user code.
:param final_deps: a list of dependencies needed based on the user code
:param essentials: essential objects that should not be removed from final lib
:param all_object_files: a lsit of all objects available in our static library
:return: None
"""
# find objects which are not part of any dependency (lstrip is needed for remove '_' added in crop.sh)
to_be_removed = [x for x in all_object_files if not any(x.lstrip('_')[:-5] in y for y in final_deps)]
# keep the ones that are not an essential object file. (lstrip is needed for remove '_' added in crop.sh)
to_be_removed = [x for x in to_be_removed if not any(x.lstrip('_') in y for y in essentials)]
print('Removing:', len(to_be_removed), 'unused objects.')
pprint(sorted(to_be_removed))
for filename in to_be_removed:
os.remove(os.path.join(OBJECTS_DIR, filename))
def main():
# load tables created using cropper.py
dependencies_dict = load_dependencies()
associations_dict = load_associations()
# get all objects filename
all_object_files = [x[x.rfind('/') + 1:] for x in glob.glob('{}*.o'.format(OBJECTS_DIR))]
print("All Obj files: {}".format(len(all_object_files)))
# find ops in user code
my_parser = parser.SimpleParser()
temp = [my_parser.parse(x) for x in user_code_filenames]
user_ops = set(itertools.chain(*temp))
print('user ops: {}'.format(user_ops))
# user is not using any MindData op
if not user_ops:
warnings.warn('No MindData Ops detected in your code...')
remove_unused_objects([], [], all_object_files)
with open(os.path.join(OBJECTS_DIR, ALL_DEPS_FILENAME), 'w') as _:
pass
exit(0)
# find dependencies required (based on user ops)
unique_deps = get_unique_dependencies(dependencies_dict, associations_dict, user_ops)
print('Unique Deps (.h): {}'.format(len(unique_deps)))
print('Unique Deps (.cc): {}'.format(len(list(filter(lambda x: x[-2:] == 'cc', unique_deps)))))
# add essential files to dependency files
final_deps = set(unique_deps + dependencies_dict['ESSENTIAL'])
print('Total Deps (.h): {}'.format(len(final_deps)))
# delete the rest of the object files from directory.
remove_unused_objects(final_deps, ESSENTIAL_OBJECTS, all_object_files)
# write all dependencies to the file (for extracting external ones)
with open(os.path.join(OBJECTS_DIR, ALL_DEPS_FILENAME), 'w') as fout:
fout.write("\n".join(unique_deps) + '\n')
if __name__ == "__main__":
# get user code filename(s) as argument(s) to code
if len(sys.argv) <= 1:
print("usage: python build_lib.py <xxx.y> [<xxx.z>]")
exit(1)
user_code_filenames = sys.argv[1:]
main()

View File

@ -0,0 +1,188 @@
#!/bin/bash
usage()
{
echo "Usage:"
echo "bash crop.sh -p <path-to-mindspore-directory> <source-file> [<more-source-files>] \\"
echo "bash crop.sh -h \\"
echo ""
echo "Options:"
echo " -p path to mindspore directory"
echo " -h print usage"
}
# check and set options
checkopts()
{
while getopts ':p:h' opt
do
OPTARG=$(echo ${OPTARG} | tr '[A-Z]' '[a-z]')
case "${opt}" in
p)
MINDSPORE_PATH="$(cd "${OPTARG}" &> /dev/null && pwd )"
;;
h)
usage
exit 1
;;
*)
echo "Unknown option: \"${OPTARG}\""
usage
exit 1
esac
done
}
checkopts "$@"
# exit if less than 3 args are given by user
if [ $# -lt 3 ]; then
usage
exit 1
fi
# exit if mindspore path is not given by user
if [ -z "${MINDSPORE_PATH}" ]; then
echo -e "\e[31mPlease set MINDSPORE_PATH environment variable.\e[0m"
exit 1
fi
ORIGNAL_PATH="$PWD"
FILE_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
# getting absolute paths for user provided filenames
USER_CODES=""
for i in "${@:OPTIND}";
do
USER_CODES+="$(cd "$(dirname "${i}" )" &> /dev/null && pwd )/$(basename "${i}")"
done
# exit if user has not given any argument as their code
if [ -z "${USER_CODES}" ]; then
echo -e "\e[31mPlease provide your file names as arguments.\e[0m"
exit 1
fi
echo "Provided files: $USER_CODES"
echo "MS PATH: $MINDSPORE_PATH"
echo "CWD: $ORIGNAL_PATH"
echo "File PATH: $FILE_PATH"
cd $FILE_PATH
MD_LIB_FILENAME="libminddata-lite_static.a"
# locate original MindData lite library
MD_LIB_PATH=`find $MINDSPORE_PATH -name "${MD_LIB_FILENAME}" | head -n 1`
if [ -z "${MD_LIB_PATH}" ]; then
echo -e "\e[31mMindData lite static library could not be found.\e[0m"
cd $ORIGNAL_PATH
exit 1
fi
# extract all objects of static lib to tmp/
mkdir -p tmp
cp $MD_LIB_PATH tmp
cd tmp
# extract objects with identical names by prepending (one or more) '_' to their names
# (this scruipt supports more than 2 duplicate filenames)
DUPLICATES=`ar t "${MD_LIB_FILENAME}" | sort | uniq -d`
for dup in $DUPLICATES;
do
i=0
prepend_var="_"
while :
do
i=$((i + 1))
# check if more duplicates are available (break otherwise)
error_output=$(ar xN $i "${MD_LIB_FILENAME}" $dup 2>&1)
if [ -n "$error_output" ]; then
break
fi
mv $dup "${prepend_var}${dup}"
prepend_var="${prepend_var}_"
done
done
# extract unique files from static library
UNIQUES=`ar t "${MD_LIB_FILENAME}" | sort | uniq -u`
ar x "${MD_LIB_FILENAME}" ${UNIQUES}
cd ..
# remove unused object files
# write needed depsendencies to tmp/needed_dependencies.txt
python build_lib.py ${USER_CODES}
retVal=$?
if [ $retVal -ne 0 ]; then
cd $ORIGNAL_PATH
exit 1
fi
LD_SEP='\n'
EX_SEP=$';'
LD_PATHS=""
EXTERNAL_DEPS=""
# locate external dependencies for MindData lite
LIBJPEG_PATH=`find $MINDSPORE_PATH -name "libjpeg.so*" | head -n 1`
LIBTURBOJPEG_PATH=`find $MINDSPORE_PATH -name "libturbojpeg.so*" | head -n 1`
LIBSECUREC_PATH=`find $MINDSPORE_PATH -name libsecurec.a | head -n 1`
# resolve symbolc links
if [ "$(uname)" == "Darwin" ]; then
c=$(file -b "$(readlink $LIBJPEG_PATH)")
elif [ "$(expr substr "$(uname -s)" 1 5)" == "Linux" ]; then
c=$(file -b "$(readlink -f $LIBJPEG_PATH)")
fi
# detect system architecture
IFS="," read -r -a array <<< "$c"
TARGET_ARCHITECTURE=${array[1]##* }
echo "Architecture: $TARGET_ARCHITECTURE"
# exit if $ANDROID_NDK is not set by user for ARM32 or ARM64
if [ "$TARGET_ARCHITECTURE" == "ARM64" ]; then
if [ -z "${ANDROID_NDK}" ]; then
echo -e "\e[31mPlease set ANDROID_NDK environment variable.\e[0m"
cd $ORIGNAL_PATH
exit 1
fi
elif [ "$TARGET_ARCHITECTURE" == "ARM32" ]; then
if [ -z "${ANDROID_NDK}" ]; then
echo -e "\e[31mPlease set ANDROID_NDK environment variable.\e[0m"
cd $ORIGNAL_PATH
exit 1
fi
# add LIBCLANG_RT_LIB for ARM32
LIBCLANG_RT_LIB=`find $ANDROID_NDK -name libclang_rt.builtins-arm-android.a | head -n 1`
EXTERNAL_DEPS=${EXTERNAL_DEPS}${LIBCLANG_RT_LIB}${EX_SEP}
else
echo "No need for ANDROID_NDK"
fi
# Note: add .a files only to EXTERNAL_DEPS.
if grep -q 'jpeg' "tmp/needed_dependencies.txt"; then
LD_PATHS=${LD_PATHS}${LIBJPEG_PATH}${LD_SEP}
LD_PATHS=${LD_PATHS}${LIBTURBOJPEG_PATH}${LD_SEP}
EXTERNAL_DEPS=${EXTERNAL_DEPS}${LIBJPEG_PATH}${EX_SEP}
EXTERNAL_DEPS=${EXTERNAL_DEPS}${LIBTURBOJPEG_PATH}${EX_SEP}
fi
# we always need securec library
EXTERNAL_DEPS=${EXTERNAL_DEPS}${LIBSECUREC_PATH}${EX_SEP}
# create .so lib from remaining object files
cmake -S . -B . \
-DEXTERNAL_DEPS="${EXTERNAL_DEPS}" \
-DARCHITECTURE=$TARGET_ARCHITECTURE
# no dependencies to MindData lite
retVal=$?
if [ $retVal -eq 0 ]; then
make
echo -e "\e[32mLibrary was built successfully, The new list of MindData-related dependencies is as follows:\e[0m"
echo -e "\e[36m$LD_PATHS$PWD/libminddata-lite_min.so\e[0m"
fi
rm -rf tmp/
cd $ORIGNAL_PATH

View File

@ -0,0 +1,389 @@
# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
""" configure cropper tool """
from functools import lru_cache
import glob
import json
import os
import queue
import re
import subprocess
from mindspore import log as logger
DEFINE_STR = "-DENABLE_ANDROID -DENABLE_ARM -DENABLE_ARM64 -DENABLE_NEON -DNO_DLIB -DUSE_ANDROID_LOG -DANDROID"
ASSOCIATIONS_FILENAME = 'associations.txt'
DEPENDENCIES_FILENAME = 'dependencies.txt'
ERRORS_FILENAME = 'debug.txt'
OUTPUT_LOCATION = "mindspore/lite/tools/dataset/cropper"
# needed for gcc command for include directories
MANUAL_HEADERS = [
".",
"mindspore",
"mindspore/ccsrc",
"mindspore/ccsrc/minddata/dataset",
"mindspore/ccsrc/minddata/dataset/kernels/image",
"mindspore/core",
"mindspore/lite",
]
# To stop gcc command once reaching these external headers
# (not all of them may be used now in MindData lite)
EXTERNAL_DEPS = [
"graphengine/inc/external",
"akg/third_party/fwkacllib/inc",
"third_party",
"third_party/securec/include",
"build/mindspore/_deps/sqlite-src",
"build/mindspore/_deps/pybind11-src/include",
"build/mindspore/_deps/tinyxml2-src",
"build/mindspore/_deps/jpeg_turbo-src",
"build/mindspore/_deps/jpeg_turbo-src/_build",
"build/mindspore/_deps/icu4c-src/icu4c/source/i18n",
"build/mindspore/_deps/icu4c-src/icu4c/source/common",
"mindspore/lite/build/_deps/tinyxml2-src",
"mindspore/lite/build/_deps/jpeg_turbo-src",
"mindspore/lite/build/_deps/jpeg_turbo-src/_build",
"mindspore/lite/build/_deps/nlohmann_json-src",
]
# API files which the corresponding objects and all objects for their dependencies must always be included.
ESSENTIAL_FILES_1 = [
"api/data_helper.cc",
"api/datasets.cc",
"api/execute.cc",
"api/iterator.cc",
]
# API files which the corresponding objects must always be included.
# (corresponding IR files will be included according to user ops)
ESSENTIAL_FILES_2 = [
"api/text.cc",
"api/transforms.cc",
"api/samplers.cc",
"api/vision.cc",
]
DATASET_PATH = "mindspore/ccsrc/minddata/dataset"
OPS_DIRS = [
"engine/ir/datasetops",
"engine/ir/datasetops/source",
"engine/ir/datasetops/source/samplers",
"kernels/ir/vision",
"kernels/ir/data",
"text/ir/kernels",
]
def extract_classname_samplers(header_content):
"""
Use regex to find class names in header files of samplers
:param header_content: string containing header of a sampler IR file
:return: list of sampler classes found
"""
return re.findall(r"(?<=class )[\w\d_]+(?=Obj : )", header_content)
def extract_classname_source_node(header_content):
"""
Use regex to find class names in header files of source nodes
:param header_content: string containing header of a source node IR file
:return: list of source node classes found
"""
return re.findall(r"(?<=class )[\w\d_]+(?=Node : )", header_content)
def extract_classname_nonsource_node(header_content):
"""
Use regex to find class names in header files of non-source nodes
:param header_content: string containing header of a non-source IR file
:return: list of non-source node classes found
"""
return re.findall(r"(?<=class )[\w\d_]+(?=Node : )", header_content)
def extract_classname_vision(header_content):
"""
Use regex to find class names in header files of vision ops
:param header_content: string containing header of a vision op IR file
:return: list of vision ops found
"""
return re.findall(r"(?<=class )[\w\d_]+(?=Operation : )", header_content)
def extract_classname_data(header_content):
"""
Use regex to find class names in header files of data ops
:param header_content: string containing header of a data op IR file
:return: list of data ops found
"""
return re.findall(r"(?<=class )[\w\d_]+(?=Operation : )", header_content)
def extract_classname_text(header_content):
"""
Use regex to find class names in header files of text ops
:param header_content: string containing header of a text op IR file
:return: list of text ops found
"""
return re.findall(r"(?<=class )[\w\d_]+(?=Operation : )", header_content)
# For each op type (directory) store the corresponding function which extracts op name
registered_functions = {
os.path.join(DATASET_PATH, 'engine/ir/datasetops/source/samplers'): extract_classname_samplers,
os.path.join(DATASET_PATH, 'engine/ir/datasetops/source'): extract_classname_source_node,
os.path.join(DATASET_PATH, 'engine/ir/datasetops'): extract_classname_nonsource_node,
os.path.join(DATASET_PATH, 'kernels/ir/vision'): extract_classname_vision,
os.path.join(DATASET_PATH, 'kernels/ir/data'): extract_classname_data,
os.path.join(DATASET_PATH, 'text/ir/kernels'): extract_classname_text,
}
def get_headers():
"""
Get the headers flag: "-Ixx/yy -Ixx/zz ..."
:return: a string to be passed to compiler
"""
headers_paths = MANUAL_HEADERS + EXTERNAL_DEPS
output = "-I{}/".format("/ -I".join(headers_paths))
return output
@lru_cache(maxsize=1024)
def get_dependencies_of_file(headers_flag, filename):
"""
Create dependency list for a file (file0.cc):
file0.cc.o: file1.h, file2.h, ...
:param headers_flag: string containing headers include paths with -I prepended to them.
:param filename: a string containing path of a file.
:return: a list of file names [file0.cc, file1.h, file2.h, file3.h] and error string
"""
command = 'gcc -MM -MG {0} {1} {2}'.format(filename, DEFINE_STR, headers_flag)
stdout, stderr = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
deps = re.split(r'[\s\\]+', stdout.decode('utf-8').strip(), flags=re.MULTILINE)[1:]
return deps, stderr.decode('utf-8')
def needs_processing(dep_cc, processed_cc, queue_cc_set):
"""
Determine if a file's dependencies need to be processed.
:param dep_cc: the candidate file to be processed by gcc
:param processed_cc: set of files that have been already processed.
:param queue_cc_set: files currently in the queue (to be processed)
:return: boolean, whether the file should be further processed by gcc.
"""
# don't add the file to the queue if already processed
if dep_cc in processed_cc:
return False
# don't add the file to the queue if it is already there
if dep_cc in queue_cc_set:
return False
# if file doesn't exist, don't process as it will cause error (may happen for cache)
if not os.path.isfile(dep_cc):
return False
return True
def build_source_file_path(dep_h):
"""
Given the path to a header file, find the path for the associated source file.
- if an external dependency, return "EXTERNAL"
- if not found, keep the header file's path
:param dep_h: a string containing path to the header file
:return: dep_cc: a string containing path to the source file
"""
for x in EXTERNAL_DEPS:
if x in dep_h:
dep_cc = "EXTERNAL"
return dep_cc
if 'include/api/types.h' in dep_h:
dep_cc = "mindspore/ccsrc/cxx_api/types.cc"
return dep_cc
dep_cc = dep_h.replace('.hpp', '.cc').replace('.h', '.cc')
if not os.path.isfile(dep_cc):
dep_cc = dep_h
return dep_cc
def get_all_dependencies_of_file(headers_flag, filename):
"""
Create dependency list for a file (incl. all source files needed).
:param headers_flag: string containing headers include paths with -I prepended to them.
:param filename: a string containing path of a file.
:return: all dependencies of that file and the error string
"""
errors = []
# a queue to process files
queue_cc = queue.SimpleQueue()
# a set of items that have ever been in queue_cc (faster access time)
queue_cc_set = set()
# store processed files
processed_cc = set()
# add the source file to the queue
queue_cc.put(filename)
queue_cc_set.add(filename)
while not queue_cc.empty():
# process the first item in the queue
curr_cc = queue_cc.get()
deps, error = get_dependencies_of_file(headers_flag, curr_cc)
errors.append(error)
processed_cc.add(curr_cc)
# prepare its dependencies for processing
for dep_h in deps:
dep_cc = build_source_file_path(dep_h)
# ignore if marked as an external dependency
if dep_cc == "EXTERNAL":
processed_cc.add(dep_h)
continue
# add to queue if needs processing
if needs_processing(dep_cc, processed_cc, queue_cc_set):
queue_cc.put(dep_cc)
queue_cc_set.add(dep_cc)
logger.debug('file: {} | deps: {}'.format(filename[filename.rfind('/') + 1:], len(processed_cc)))
return list(processed_cc), "".join(errors)
def get_deps_essential(headers_flag):
"""
Return dependencies required for any run (essential).
:param headers_flag: string containing headers include paths with -I prepended to them.
:return: a list of essential files, and the error string
"""
essentials = []
errors = []
# find dependencies for ESSENTIAL_FILES_1 as we need them too.
for filename in [os.path.join(DATASET_PATH, x) for x in ESSENTIAL_FILES_1]:
deps, err = get_all_dependencies_of_file(headers_flag, filename)
errors.append(err)
essentials.extend(deps)
essentials.append(filename)
# we only need ESSENTIAL_FILES_2 themselves (IR files are split)
for filename in [os.path.join(DATASET_PATH, x) for x in ESSENTIAL_FILES_2]:
essentials.append(filename)
essentials = list(set(essentials))
return essentials, "".join(errors)
def get_deps_non_essential(headers_flag):
"""
Find the entry points (IR Level) for each op and write them in associations dict.
Starting from these entry point, recursively find the dependencies for each file and write in a dict.
:param headers_flag: string containing headers include paths with -I prepended to them.
:return: dependencies dict, associations dict, the error string
"""
dependencies = dict() # what files each file imports
associations = dict() # what file each op is defined in (IR level)
errors = []
for dirname in [os.path.join(DATASET_PATH, x) for x in OPS_DIRS]:
# Get the proper regex function for this directory
if dirname not in registered_functions:
raise ValueError("Directory has no registered regex function:", dirname)
extract_classname = registered_functions[dirname]
# iterate over source files in the directory
for src_filename in glob.glob("{}/*.cc".format(dirname)):
# get the dependencies of source file
deps, err = get_all_dependencies_of_file(headers_flag, src_filename)
dependencies[src_filename] = deps
errors.append(err)
# locate the corresponding header file and read it
header_filename = src_filename.replace('.cc', '.h')
if not os.path.isfile(header_filename):
raise ValueError("Header file doesn't exist!")
with open(header_filename, 'r') as f:
content = f.read().strip()
# extract ops from header file
ops = extract_classname(content)
# add the op to associations table
for raw_op in ops:
op = raw_op.lower().replace('_', '')
associations[op] = src_filename
return dependencies, associations, "".join(errors)
def main():
"""
Configure the cropper tool by creating associations.txt and dependencies.txt
"""
errors = ""
dependencies = {}
# convert to a single string with '-I' prepended to each dir name
headers_flag = get_headers()
# get dependencies for essential files
all_deps, err = get_deps_essential(headers_flag)
dependencies['ESSENTIAL'] = all_deps
errors += err
logger.debug('len(ESSENTIAL): {}'.format(len(dependencies['ESSENTIAL'])))
# get dependencies for other files (non-essentials)
other_dependencies, all_associations, err = get_deps_non_essential(headers_flag)
dependencies.update(other_dependencies)
errors += err
with open(os.path.join(OUTPUT_LOCATION, DEPENDENCIES_FILENAME), "w") as f:
json.dump(dependencies, f)
with open(os.path.join(OUTPUT_LOCATION, ASSOCIATIONS_FILENAME), "w") as f:
json.dump(all_associations, f)
with open(os.path.join(OUTPUT_LOCATION, ERRORS_FILENAME), "w") as f:
f.write(errors)
if __name__ == "__main__":
logger.info('STARTING: cropper_configure.py ')
original_path = os.getcwd()
script_path = os.path.dirname(os.path.abspath(__file__))
try:
# change directory to mindspore directory
os.chdir(os.path.join(script_path, "../../../../.."))
main()
except (OSError, IndexError, KeyError):
logger.error('FAILED: cropper_configure.py!')
raise
else:
logger.info('SUCCESS: cropper_configure.py ')
finally:
os.chdir(original_path)

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,84 @@
# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
""" extract ops from user code """
from abc import ABC, abstractmethod
from functools import lru_cache
import json
import os
ASSOCIATION_FILENAME = 'associations.txt'
@lru_cache(maxsize=1)
def _load_ops_names():
"""
Get the name of all ops available in MindData lite.
:return: a list of all available ops in MindData lite
"""
with open(os.path.expanduser(ASSOCIATION_FILENAME), 'r') as f:
_dict = json.load(f)
return _dict.keys()
class Parser(ABC):
"""
Abstract Base Class for parsers for looking up ops in user code.
"""
def __init__(self):
self._all_ops = _load_ops_names()
@abstractmethod
def parse(self, user_filename):
"""
finds ops detected in the user code
:param user_filename: string, name of file containing user code
:return: list of ops found in the user code
"""
class SimpleParser(Parser):
"""
A simple parser that works by string matching:
Code uses an op if it is found anywhere in the text.
"""
def parse(self, user_filename):
"""
Find and return ops in the user file.
:param user_filename: filename of user code
:return: a list of ops present in the file
"""
if not os.path.isfile(user_filename):
raise FileNotFoundError("file does not exist: {}".format(user_filename))
with open(user_filename) as f:
data = f.read().strip()
user_ops = self._simple_string_match(data)
return user_ops
def _simple_string_match(self, user_text):
"""
Find and return ops in the user code (provided as a string).
:param user_text: string containing user code
:return: a list of ops found in the user_text
"""
processed_user_text = user_text.strip().lower()
user_ops = [op for op in self._all_ops if op in processed_user_text]
return user_ops