!6063 modify loading way of hccl file and display content during training

Merge pull request !6063 from hwjiaorui/master
This commit is contained in:
mindspore-ci-bot 2020-09-14 15:53:39 +08:00 committed by Gitee
commit 15a7722d84
2 changed files with 15 additions and 8 deletions

View File

@ -82,12 +82,19 @@ run_ascend(){
fi
rank_file_name=${2##*/}
IFS='_' read -ra array <<<"${rank_file_name}"
device_id_list=${array[2]}
first_device=${device_id_list:0:1}
#rank_file_name=${2##*/}
#IFS='_' read -ra array <<<"${rank_file_name}"
#device_id_list=${array[2]}
#first_device=${device_id_list:0:1}
#last_device=${device_list:${#device_list}-1:1}
device_num=${#device_id_list}
#device_num=${#device_id_list}
cat $2 | awk -F "[device_id]" '/device_id/{print$0}' >temp.log
array=$(cat temp.log | awk -F "[:]" '/device_id/{print$2}')
rm temp.log
IFS=" " read -ra device_list <<<$array
first_device=${device_list[0]:1:1}
device_num=${#device_list[*]}
ulimit -u unlimited
export DEVICE_NUM=${device_num}
@ -188,3 +195,4 @@ elif [ $1 = "GPU" ] ; then
else
echo "Unsupported device target: $1"
fi;

View File

@ -74,8 +74,7 @@ class Monitor(Callback):
cur_step_in_epoch = (cb_params.cur_step_num - 1) % cb_params.batch_num
print("epoch: [{:3d}/{:3d}], step:[{:5d}/{:5d}], loss:[{:5.3f}/{:5.3f}], time:[{:5.3f}], lr:[{:5.5f}]".format(
cb_params.cur_epoch_num -
1, cb_params.epoch_num, cur_step_in_epoch, cb_params.batch_num, step_loss,
cb_params.cur_epoch_num, cb_params.epoch_num, cur_step_in_epoch+1, cb_params.batch_num, step_loss,
np.mean(self.losses), step_mseconds, self.lr_init[cb_params.cur_step_num - 1]))