mindspore/tests/st/fl/albert/cloud_eval.py

102 lines
3.8 KiB
Python

# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
import argparse
import os
import sys
from time import time
from mindspore import context
from mindspore.train.serialization import load_checkpoint
from src.config import eval_cfg, server_net_cfg
from src.dataset import load_datasets
from src.utils import restore_params
from src.model import AlbertModelCLS
from src.tokenization import CustomizedTextTokenizer
from src.assessment_method import Accuracy
def parse_args():
"""
parse args
"""
parser = argparse.ArgumentParser(description='server eval task')
parser.add_argument('--device_target', type=str, default='GPU', choices=['Ascend', 'GPU', 'CPU'])
parser.add_argument('--device_id', type=str, default='0')
parser.add_argument('--tokenizer_dir', type=str, default='../model_save/init/')
parser.add_argument('--eval_data_dir', type=str, default='../datasets/eval/')
parser.add_argument('--model_path', type=str, default='../model_save/train_server/0.ckpt')
parser.add_argument('--vocab_map_ids_path', type=str, default='../model_save/init/vocab_map_ids.txt')
return parser.parse_args()
def server_eval(args):
start = time()
# some parameters
os.environ['CUDA_VISIBLE_DEVICES'] = args.device_id
tokenizer_dir = args.tokenizer_dir
eval_data_dir = args.eval_data_dir
model_path = args.model_path
vocab_map_ids_path = args.vocab_map_ids_path
# mindspore context
context.set_context(mode=context.GRAPH_MODE, device_target='GPU')
print('Context setting is done! Time cost: {}'.format(time() - start))
sys.stdout.flush()
start = time()
# data process
tokenizer = CustomizedTextTokenizer.from_pretrained(tokenizer_dir, vocab_map_ids_path=vocab_map_ids_path)
datasets_list, _ = load_datasets(
eval_data_dir, server_net_cfg.seq_length, tokenizer, eval_cfg.batch_size,
label_list=None,
do_shuffle=False,
drop_remainder=False,
output_dir=None)
print('Data process is done! Time cost: {}'.format(time() - start))
sys.stdout.flush()
start = time()
# main model
albert_model_cls = AlbertModelCLS(server_net_cfg)
albert_model_cls.set_train(False)
param_dict = load_checkpoint(model_path)
restore_params(albert_model_cls, param_dict)
print('Model construction is done! Time cost: {}'.format(time() - start))
sys.stdout.flush()
start = time()
# eval
callback = Accuracy()
global_step = 0
for datasets in datasets_list:
for batch in datasets.create_tuple_iterator():
input_ids, attention_mask, token_type_ids, label_ids, _ = batch
logits = albert_model_cls(input_ids, attention_mask, token_type_ids)
callback.update(logits, label_ids)
print('eval step: {}, {}: {}'.format(global_step, callback.name, callback.get_metrics()))
sys.stdout.flush()
global_step += 1
metrics = callback.get_metrics()
print('Final {}: {}'.format(callback.name, metrics))
sys.stdout.flush()
print('Evaluating process is done! Time cost: {}'.format(time() - start))
sys.stdout.flush()
if __name__ == '__main__':
args_opt = parse_args()
server_eval(args_opt)