mindspore/tests/st/frontend_compile_cache/test_compile_cache.py

294 lines
12 KiB
Python

# Copyright 2021-2022 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
import os
import re
import shutil
import subprocess
import pytest
import numpy as np
match_output = re.compile(r'[{](.*?)[}]', re.S)
match_num = re.compile(r'\d+\.?\d*', re.S)
def run_twice_with_same_network(file_name, cache_path, log_file_name_first, log_file_name_second):
# Clear compile cache folder and log files
if os.path.exists(cache_path):
shutil.rmtree(cache_path)
if os.path.exists(log_file_name_first):
os.remove(log_file_name_first)
if os.path.exists(log_file_name_second):
os.remove(log_file_name_second)
assert not os.path.exists(cache_path)
assert not os.path.exists(log_file_name_first)
assert not os.path.exists(log_file_name_second)
# First run without compile cache
cmd_first = f"GLOG_v=2 python " + file_name + " '" + cache_path + "' > " + log_file_name_first + " 2>&1"
subprocess.check_output(cmd_first, shell=True)
assert os.path.exists(log_file_name_first)
assert os.path.exists(cache_path)
with open(log_file_name_first, "r") as f_first:
data_first = f_first.read()
assert "Check the consistency of dependency files hash failed. Execute all the compilation actions." in data_first
# Take out the result of the first run
match_output_first = re.findall(match_output, data_first)
assert len(match_output_first) == 2
nums_first = re.findall(match_num, match_output_first[0])
array_first = np.array([float(x) for x in nums_first])
shape_first = re.findall(match_num, match_output_first[1])
array_shape_first = np.array([int(x) for x in shape_first])
# Second run with compile cache
cmd_second = f"GLOG_v=2 python " + file_name + " '" + cache_path + "' > " + log_file_name_second + \
" 2>&1"
subprocess.check_output(cmd_second, shell=True)
assert os.path.exists(log_file_name_second)
with open(log_file_name_second, "r") as f_second:
data_second = f_second.read()
assert "Use the compilation cache and execute the backend actions only. Be aware of correctness risks." in \
data_second
# Take out the result of the second run
match_output_second = re.findall(match_output, data_second)
assert len(match_output_second) == 2
nums_second = re.findall(match_num, match_output_second[0])
array_second = np.array([float(x) for x in nums_second])
shape_second = re.findall(match_num, match_output_second[1])
array_shape_second = np.array([int(x) for x in shape_second])
assert np.allclose(array_first, array_second, 0.0001, 0.0001)
assert (array_shape_first == array_shape_second).all()
# Clean files
os.remove(log_file_name_first)
os.remove(log_file_name_second)
shutil.rmtree(cache_path)
def run_twice_with_different_networks(file_name_first, file_name_second, cache_path, log_file_name_first,
log_file_name_second):
# Clear compile cache folder
if os.path.exists(cache_path):
shutil.rmtree(cache_path)
assert not os.path.exists(cache_path)
# First run without compile cache
cmd_first = f"GLOG_v=2 python " + file_name_first + " '" + cache_path + "' > " + log_file_name_first + " 2>&1"
subprocess.check_output(cmd_first, shell=True)
assert os.path.exists(log_file_name_first)
assert os.path.exists(cache_path)
with open(log_file_name_first, "r") as f_first:
data_first = f_first.read()
assert "Check the consistency of dependency files hash failed. Execute all the compilation actions." in data_first
# Second run with compile cache
cmd_second = f"GLOG_v=2 python " + file_name_second + " '" + cache_path + "' > " + log_file_name_second + " 2>&1"
subprocess.check_output(cmd_second, shell=True)
assert os.path.exists(log_file_name_second)
with open(log_file_name_second, "r") as f_second:
data_second = f_second.read()
assert "Check the consistency of dependency files hash failed. Execute all the compilation actions." in data_second
# Clean log files
os.remove(log_file_name_first)
os.remove(log_file_name_second)
shutil.rmtree(cache_path)
def check_log(role, log_name, str_to_check):
assert os.path.exists(role + "/" + log_name)
with open(role + "/" + log_name, "r") as f:
data = f.read()
assert str_to_check in data
def start_ps_subprocess(script_path, cache_path, str_to_check, log_name):
cwd = os.getcwd()
# start sched first time.
os.environ['MS_ROLE'] = 'MS_SCHED'
cmd_first = f"cd " + cwd + "/sched && GLOG_v=2 python ../" + script_path + " ../" + cache_path + " > " \
+ log_name + " 2>&1 &"
subprocess.run(cmd_first, shell=True)
# start server first time.
os.environ['MS_ROLE'] = 'MS_PSERVER'
cmd_first = f"cd " + cwd + "/server && GLOG_v=2 python ../" + script_path + " ../" + cache_path + " > " \
+ log_name + " 2>&1 &"
subprocess.run(cmd_first, shell=True)
# start worker first time.
os.environ['MS_ROLE'] = 'MS_WORKER'
cmd_first = f"cd " + cwd + "/worker && GLOG_v=2 python ../" + script_path + " ../" + cache_path + " > " \
+ log_name + " 2>&1"
subprocess.run(cmd_first, shell=True, check=True)
os.chdir(cwd)
check_log("sched", log_name, str_to_check)
check_log("server", log_name, str_to_check)
check_log("worker", log_name, str_to_check)
def clear_and_make_run_dir(dir_path):
shutil.rmtree(dir_path, ignore_errors=True)
assert not os.path.exists(dir_path)
os.mkdir(dir_path)
assert os.path.exists(dir_path)
def check_compile_cache_files(cache_path, role):
assert os.path.exists(cache_path)
assert os.path.exists(cache_path + "/rank_0/graph_cache/" + role + "compile_cache_0.mindir")
assert os.path.exists(cache_path + "/rank_0/graph_cache/" + role + "compile_dependency.hash")
def run_lenet_ps_twice(file_name, cache_path, log_file_name_first, log_file_name_second):
# Clear compile cache folder and log files
shutil.rmtree(cache_path, ignore_errors=True)
assert not os.path.exists(cache_path)
clear_and_make_run_dir("sched")
clear_and_make_run_dir("server")
clear_and_make_run_dir("worker")
# Set envs
os.environ['MS_SCHED_HOST'] = '127.0.0.1'
os.environ['MS_SCHED_PORT'] = '8182'
os.environ['MS_SCHED_NUM'] = '1'
os.environ['MS_SERVER_NUM'] = '1'
os.environ['MS_WORKER_NUM'] = '1'
# First run
first_str_to_check = "Check the consistency of dependency files hash failed. Execute all the compilation actions."
start_ps_subprocess(file_name, cache_path, first_str_to_check, log_file_name_first)
assert os.path.exists(cache_path)
check_compile_cache_files(cache_path, "")
check_compile_cache_files(cache_path, "pserver_")
check_compile_cache_files(cache_path, "pscheduler_")
# Second run
os.environ['MS_SCHED_PORT'] = '8183'
second_str_to_check = "Use the compilation cache and execute the backend actions only. Be aware of correctness" \
" risks."
start_ps_subprocess(file_name, cache_path, second_str_to_check, log_file_name_second)
# Clear
del os.environ['MS_SCHED_HOST']
del os.environ['MS_SCHED_PORT']
del os.environ['MS_ROLE']
del os.environ['MS_SCHED_NUM']
del os.environ['MS_SERVER_NUM']
del os.environ['MS_WORKER_NUM']
shutil.rmtree("sched", ignore_errors=True)
shutil.rmtree("server", ignore_errors=True)
shutil.rmtree("worker", ignore_errors=True)
shutil.rmtree(cache_path, ignore_errors=True)
@pytest.mark.level0
@pytest.mark.platform_x86_ascend_training
@pytest.mark.platform_arm_ascend_training
@pytest.mark.env_onecard
def test_compile_cache_load_weights():
"""
Feature: Compile cache.
Description: Test whether the compile cache can load the value of parameters successfully.
Expectation: success.
"""
run_twice_with_same_network("run_network_with_weights.py", "./weight", "weight_first.txt", "weight_second.txt")
@pytest.mark.level0
@pytest.mark.platform_x86_ascend_training
@pytest.mark.platform_arm_ascend_training
@pytest.mark.env_onecard
def test_compile_cache_lenet():
"""
Feature: Compile cache.
Description: Test whether the regular compile cache function can run successfully.
Expectation: success.
"""
run_twice_with_same_network("run_lenet.py", "./lenet", "lenet_first.txt", "lenet_second.txt")
@pytest.mark.level0
@pytest.mark.platform_x86_ascend_training
@pytest.mark.platform_arm_ascend_training
@pytest.mark.env_onecard
def test_compile_cache_net_with_control_flow():
"""
Feature: Compile cache.
Description: Test whether the compile cache can load ref type parameter correctly.
Expectation: success.
"""
run_twice_with_same_network("run_network_with_control_flow.py", "./control_flow", "control_net_first.txt",
"control_net_second.txt")
@pytest.mark.level0
@pytest.mark.platform_x86_ascend_training
@pytest.mark.platform_arm_ascend_training
@pytest.mark.env_onecard
def test_compile_cache_auto_detect():
"""
Feature: Compile cache.
Description: Test whether the compile cache auto-detection function can run successfully.
Expectation: success.
"""
run_twice_with_different_networks("run_lenet.py", "run_network_with_weights.py", "./lenet_auto_detect",
"auto_detect_first.txt", "auto_detect_second.txt")
@pytest.mark.level0
@pytest.mark.platform_x86_ascend_training
@pytest.mark.platform_arm_ascend_training
@pytest.mark.env_onecard
def test_compile_cache_lenet_change_dir():
"""
Feature: Compile cache.
Description: Test whether the regular compile cache function can run successfully when changing
the current work directory.
Expectation: success.
"""
cwd = os.getcwd()
new_path = cwd + '/tmp'
shutil.rmtree(new_path, ignore_errors=True)
os.mkdir(new_path)
os.chdir(new_path)
run_twice_with_same_network("../run_lenet.py", "../lenet_change_dir", "../lenet_change_dir_first.txt",
"../lenet_change_dir_second.txt")
shutil.rmtree(new_path, ignore_errors=True)
@pytest.mark.level0
@pytest.mark.platform_x86_ascend_training
@pytest.mark.platform_arm_ascend_training
@pytest.mark.env_onecard
def test_compile_cache_lenet_ps():
"""
Feature: Compile cache.
Description: Test whether the regular compile cache function can run successfully with lenet in ps mode.
Expectation: success.
"""
run_lenet_ps_twice("run_lenet_ps.py", "./lenet_ps", "lenet_ps_first.txt", "lenet_ps_second.txt")
@pytest.mark.level0
@pytest.mark.platform_x86_ascend_training
@pytest.mark.platform_arm_ascend_training
@pytest.mark.env_onecard
def test_compile_cache_ms_function():
"""
Feature: Compile cache.
Description: Test whether the compile cache function can run successfully in the compilation of ms_function.
Expectation: success.
"""
run_twice_with_same_network("run_lenet_ms_function.py", "./lenet_ms_function", "lenet_ms_function_first.txt",
"lenet_ms_function_second.txt")