diff --git a/tests/perf_test/mind_expression_perf/generate_report.py b/tests/perf_test/mind_expression_perf/generate_report.py new file mode 100644 index 00000000000..00b4e2282b0 --- /dev/null +++ b/tests/perf_test/mind_expression_perf/generate_report.py @@ -0,0 +1,143 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +import sys +import re +import json +import os +import time +import openpyxl as opx + + +def parse_arguments(): + print(sys.argv) + me_report_path = sys.argv[1] + log_path = sys.argv[2] + n_iter = sys.argv[3] + out = sys.argv[4] + assert n_iter.isdigit() + return me_report_path, log_path, int(n_iter), out + + +def extract_by_keyword(doc, keyword, pattern): + rst = [] + for i, s in enumerate(doc): + if keyword in s: + p = re.findall(pattern, s) + print("L%d: extracted %s from '%s'" % (i, p, s.strip())) + rst.extend(p) + return rst + + +def process_log(fname, log_path, n_iter, keyword, pattern): + rnt = {} + for i in range(1, 1+n_iter): + fname_path = os.path.join(log_path, fname % i) + with open(fname_path) as f: + print("\nLoading %s" % fname_path) + rst = extract_by_keyword(f, keyword, pattern) + rnt[fname % i] = rst + return rnt + + +def summarize(func): + def wrapper(*args, **kwargs): + log = func(*args, **kwargs) + times = list(log.items()) + times.sort(key=lambda x: x[1]) + min_file, min_time = times[0] + avg = sum(map(lambda x: x[1], times)) / len(times) + log["min_time"] = min_time + log["min_file"] = min_file + log["avg_time"] = avg + return log + return wrapper + + +@summarize +def process_bert_log(log_path, n_iter): + fname = "bert%d.log" + total = process_log(fname, log_path, n_iter, "TotalTime", r"\d+.\d+") + task = process_log(fname, log_path, n_iter, "task_emit", r"\d+.\d+") + log = {} + for fname in total: + log[fname] = float(total[fname][0]) - float(task[fname][0]) + return log + + +@summarize +def process_resnet_log(log_path, n_iter): + fname = "resnet%d.log" + total = process_log(fname, log_path, n_iter, "TotalTime", r"\d+.\d+") + task = process_log(fname, log_path, n_iter, "task_emit", r"\d+.\d+") + log = {} + for fname in total: + log[fname] = float(total[fname][0]) - float(task[fname][0]) + return log + + +@summarize +def process_gpt_log(log_path, n_iter): + fname = "gpt%d.log" + total = process_log(fname, log_path, n_iter, "TotalTime", r"\d+.\d+") + task = process_log(fname, log_path, n_iter, "task_emit", r"\d+.\d+") + log = {} + for fname in total: + log[fname] = float(total[fname][0]) - float(task[fname][0]) + return log + + +@summarize +def process_reid_log(log_path, n_iter): + log = {} + for i in range(8): + fname = "reid_%d_"+str(i)+".log" + total = process_log(fname, log_path, n_iter, "TotalTime", r"\d+.\d+") + task = process_log(fname, log_path, n_iter, "task_emit", r"\d+.\d+") + for fname in total: + log[fname] = float(total[fname][0]) - float(task[fname][0]) + return log + + +def write_to_me_report(log, me_report_path): + wb = opx.load_workbook(me_report_path) + sheet = wb["Sheet"] + idx = sheet.max_row + 1 + date = time.strftime('%m%d', time.localtime()) + sheet['A%d' % idx] = date + sheet['B%d' % idx] = round(log["reid"]["min_time"], 2) + sheet['C%d' % idx] = round(log["bert"]["min_time"], 2) + sheet['D%d' % idx] = round(log['resnet']["min_time"], 2) + sheet['E%d' % idx] = round(log['gpt']["min_time"], 2) + wb.save(me_report_path) + + +def generate_report(): + me_report_path, log_path, n_iter, out = parse_arguments() + log_data = {} + bert_log = process_bert_log(log_path, n_iter) + resnet_log = process_resnet_log(log_path, n_iter) + gpt_log = process_gpt_log(log_path, n_iter) + reid_log = process_reid_log(log_path, n_iter) + log_data["bert"] = bert_log + log_data["resnet"] = resnet_log + log_data["gpt"] = gpt_log + log_data["reid"] = reid_log + with open(out, "w") as f: + json.dump(log_data, f, indent=2) + write_to_me_report(log_data, me_report_path) + + +if __name__ == "__main__": + generate_report() diff --git a/tests/perf_test/mind_expression_perf/process_data.py b/tests/perf_test/mind_expression_perf/process_data.py new file mode 100644 index 00000000000..89a968033c0 --- /dev/null +++ b/tests/perf_test/mind_expression_perf/process_data.py @@ -0,0 +1,104 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +import os +import sys +import json +import openpyxl as opx +import matplotlib.ticker as ticker +import matplotlib.pyplot as plt + + +def parse_arguments(): + log_path = sys.argv[1] + log_data = sys.argv[2] + me_report = sys.argv[3] + n_days = sys.argv[4] + assert n_days.isdigit() + return log_path, log_data, me_report, int(n_days) + + +def read_data(log_data, me_report_path, n_days): + with open(log_data) as f: + log = json.load(f) + + wb = opx.load_workbook(me_report_path) + sheet = wb["Sheet"] + n_row = sheet.max_row + date = [cell[0].value for cell in sheet["A2":"A%d" % n_row]] + reid_data = [float(cell[0].value) for cell in sheet["B2":"B%d" % n_row]] + bert_data = [float(cell[0].value) for cell in sheet["C2":"C%d" % n_row]] + resnet_data = [float(cell[0].value) for cell in sheet["D2":"D%d" % n_row]] + gpt_data = [float(cell[0].value) for cell in sheet["E43":"E%d" % n_row]] + if n_days > 0: + date = date[-n_days:] + reid_data = reid_data[-n_days:] + bert_data = bert_data[-n_days:] + resnet_data = resnet_data[-n_days:] + gpt_data = gpt_data[-n_days:] + + return log, date, reid_data, bert_data, resnet_data, gpt_data + + +def draw_figure(x_data, y_data, labels, title, out, height=24, width=8, tick_space=2): + print("Generating figure to: %s" % out) + plt.figure(figsize=(height, width)) + for y, label in zip(y_data, labels): + x = x_data[-len(y):] + n_data = len(x) + assert len(x) == len( + y), "assume len(x) == len(y), while %d != %d" % (len(x), len(y)) + plt.plot(x, y, linewidth=2, marker='o', markersize=5, label=label) + ax = plt.gca() + ax.xaxis.set_major_locator(ticker.MultipleLocator(tick_space)) + for i in range(n_data): + if i % 2 == 0: + plt.text(x[i], y[i], y[i], ha='center', + va='bottom', fontsize=8) + + plt.title(title) + plt.xlabel("Date") + plt.ylabel("Time(s)") + plt.grid() + plt.legend() + plt.savefig(out) + + +def generate_report(log, labels, log_path): + for label in labels: + fname = log[label]["min_file"] + fname_path = os.path.join(log_path, fname) + out_path = os.path.join(log_path, "reports", label+"_me.log") + print("Generating report to: %s" % out_path) + os.system("grep -A 230 'TotalTime = ' %s > %s" % + (fname_path, out_path)) + + +def process_data(): + log_path, log_data, me_report, n_days = parse_arguments() + log, date, reid_data, bert_data, resnet_data, gpt_data = read_data( + log_data, me_report, n_days) + draw_figure(date, + [reid_data, bert_data, gpt_data], + ["ReID", "BERT", "GPT"], + "ReID&BERT&GPT", + os.path.join(log_path, "reports", "reid_bert_gpt.png") + ) + draw_figure(date, [resnet_data], ["ResNet"], "ResNet", + os.path.join(log_path, "reports", "resnet.png")) + generate_report(log, list(log.keys()), log_path) + + +if __name__ == "__main__": + process_data() diff --git a/tests/perf_test/mind_expression_perf/run.sh b/tests/perf_test/mind_expression_perf/run.sh new file mode 100644 index 00000000000..d526f2e58bc --- /dev/null +++ b/tests/perf_test/mind_expression_perf/run.sh @@ -0,0 +1,146 @@ +#!/bin/bash +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +stage=0 +days=7 +iter=5 +device_id=0 +n_worker=128 +work_dir="/opt/npu/me_monitor" +me_report_path=$work_dir/logs/ME_report_daily.xlsx +log_dir=logs_$(date "+%m%d-%H%M") +log_path=$work_dir/logs/$log_dir +ms_master="https://gitee.com/mindspore/mindspore.git" +log_data="data.json" +ci_mode=true + +set -e +set -o pipefail + +# parse arguments from command line +while getopts "s:d:i:l:" args +do + case $args in + s) + stage=$OPTARG + ;; + d) + days=$OPTARG + ;; + i) + iter=$OPTARG + ;; + l) + log_dir=$OPTARG + log_path=$work_dir/logs/$log_dir + ;; + ?) + echo "unknown argument" + exit 1 + ;; + esac +done + +source env.sh +export DEVICE_ID=$device_id +echo "Args: days=$days, iter=$iter, log_path=$log_path" +cd $work_dir + +echo $WORKSPACE +WORKSPACE=/home/jenkins-slave/workspace/MindSpore_Network_reid_compile_performance +echo $WORKSPACE + +if [ $stage -le 1 ]; then + echo "" + echo "===========Stage 1: Fetching latest mindspore from master===========" + if [ -d mindspore ]; then + rm -rf mindspore + fi + git clone $ms_master +fi + +if [ $stage -le 2 ]; then + echo "" + echo "===========Stage 2: Building mindspore===========" + cd $work_dir/mindspore + bash build.sh -e ascend -j $n_worker -p on +fi + +if [ $stage -le 3 ]; then + echo "" + echo "===========Stage 3: Compiling networks===========" + cd $work_dir + mkdir -p $log_path + + # Compiling ReID-8 + # split resource-consuming task from others + for count in $(seq 1 $iter); do + echo "[INFO] Compiling ReID-8p, iteration $count" + if [ -d reid$count ]; then + rm -rf reid$count + fi + mkdir reid$count + cd reid$count + bash $work_dir/faceReidToMe/dist_env/env_26/dist_env_26.sh + for num in {0..7}; do + cp device_$num/test_reid_stage123_1024node_graphdata_dynamiclossscale_log$num.log $log_path/reid_${count}_${num}.log + done + cd $work_dir + mv reid$count $log_path + done + + # Compiling BERT + cd $work_dir + for count in $(seq 1 $iter); do + echo "[INFO] Compiling BERT, iteration $count" + pytest -s mindspore/tests/perf_test/bert/test_bert_train.py::test_bert_train | tee $log_path/bert$count.log + done + + # Compiling ResNet50 + for count in $(seq 1 $iter); do + echo "[INFO] Compiling ResNet50, iteration $count" + pytest -s mindspore/tests/perf_test/test_resnet_train.py::test_train_step | tee $log_path/resnet$count.log + done + + # Compiling GPT + for count in $(seq 1 $iter); do + echo "[INFO] Compiling GPT, iteration $count" + cd gpt + bash scripts/run_standalone_train.sh 0 1 $work_dir/gpt_data | tee $log_path/gpt$count.log + done +fi + +if [ $stage -le 4 ]; then + echo "" + echo "===========Stage 4: Processing log files===========" + cd $work_dir + python process_data.py $me_report_path $log_path $iter $log_path/$log_data +fi + +if [ $stage -le 5 ]; then + echo "" + echo "===========Stage 5: Generating reports===========" + if [ ! -d $log_path/reports ]; then + mkdir $log_path/reports + fi + python generate_report.py $log_path $log_path/$log_data $me_report_path $days + + if [ $ci_mode ]; then + echo "copying file to artifacts" + mkdir -p ${WORKSPACE}/archive + cp $log_path/reports/* ${WORKSPACE}/archive + fi +fi