!18887 merge dynamics

Merge pull request !18887 from Maige/dynamics
This commit is contained in:
i-robot 2021-06-26 08:04:58 +00:00 committed by Gitee
commit bc1a1fc8be
8 changed files with 412 additions and 16 deletions

View File

@ -70,13 +70,19 @@ In `deepmodeling/deepmd-kit/source`:
```shell
├── md
├── README.md # descriptions about MD
├── README.md # descriptions about MD
├── script
│ ├── eval.sh # evaluation script
│ ├── eval.sh # evaluation script
├── src
│ ├── descriptor.py # descriptor function
│ └── network.py # MD simulation architecture
└── eval.py # evaluation interface
│ ├── src
│ ├── config.py # Parameter config
│ ├── moxing_adapter.py # modelarts device configuration
│ ├── device_adapter.py # Device Config
│ ├── local_adapter.py # local device config
│ ├── descriptor.py # descriptor function
│ └── network.py # MD simulation architecture
└── eval.py # evaluation interface
└── default_config.yaml # config file
```
### Training Process
@ -88,7 +94,7 @@ To Be Done
After installing MindSpore via the official website, you can start evaluation as follows:
```shell
python eval.py --dataset_path [DATASET_PATH] --checkpoint_path [CHECKPOINT_PATH]
python eval.py --dataset_path [DATASET_PATH] --checkpoint_path [CHECKPOINT_PATH] --baseline_path [BASELINE_PATH]
```
> checkpoint can be trained by using DeePMD-kit, and convert into the ckpt of MindSpore.
@ -102,6 +108,39 @@ energy: -29944.03
atom_energy: -94.38766 -94.294426 -94.39194 -94.70758 -94.51311 -94.457954 ...
```
- running on ModelArts
- If you want to train the model on modelarts, you can refer to the [official guidance document] of modelarts (https://support.huaweicloud.com/modelarts/)
```python
# Example of using distributed training dpn on modelarts :
# Data set storage method
# ├── molecular_dynamics_dataset # dataset dir
# ├──baseline.npz # baseline dataset
# ├──input_tensor.npz # infer input dataset
# ├──water_md.ckpt # checkpoint
# Choose either a (modify yaml file parameters) or b (modelArts create training job to modify parameters) 。
# Example of using model inference on modelarts
# (1) Place the trained model to the corresponding position of the bucket。
# (2) chocie a or b。
# a.set "enable_modelarts=True"
# set "checkpoint_path=/cache/data/water_md.ckpt"
# set "dataset_path=/cache/data/input_tensor.npz"
# set "baseline_path=/cache/data/baseline.npz"
# b. Add "enable_modelarts=True" parameter on the interface of modearts。
# Set the parameters required by method a on the modelarts interface
# Note: The path parameter does not need to be quoted
# (3) Set the path of the network configuration file "_config_path=/The path of config in default_config.yaml/"
# (4) Set the code path on the modelarts interface "/path/molecular_dynamics"。
# (5) Set the model's startup file on the modelarts interface "eval.py" 。
# (6) Set the data path of the model on the modelarts interface ".../molecular_dynamics"(choices molecular_dynamics Folder path) ,
# The output path of the model "Output file path" and the log path of the model "Job log path" 。
# (7) Start model inference。
```
## ModelZoo Homepage
Please check the official [homepage](https://gitee.com/mindspore/mindspore/tree/master/model_zoo).

View File

@ -0,0 +1,32 @@
# Builtin Configurations(DO NOT CHANGE THESE CONFIGURATIONS unlesee you know exactly what you are doing)
enable_modelarts: False
# url for modelarts
data_url: ""
train_url: ""
checkpoint_url: ""
# path for local
data_path: "/cache/data"
output_path: "/cache/train"
load_path: "/cache/checkpoint_path"
device_target: "Ascend"
enable_profiling: False
# ======================================================================================
# Eval options
checkpoint_path: ""
dataset_path: ""
---
# Help description for each configuration
enable_modelarts: "Whether training on modelarts default: False"
data_url: "Url for modelarts"
train_url: "Url for modelarts"
data_path: "The location of input data"
output_pah: "The location of the output file"
device_target: "device id of GPU or Ascend. (Default: None)"
enable_profiling: "Whether enable profiling while training default: False"
file_name: "CNN&CTC output air name"
file_format: "choices [AIR, MINDIR]"
ckpt_file: "CNN&CTC ckpt file"
checkpoint_path: "Checkpoint file path"
dataset_path: "Datasetpath"

View File

@ -13,25 +13,30 @@
# limitations under the License.
# ============================================================================
"""eval."""
import argparse
import numpy as np
import mindspore.common.dtype as mstype
from mindspore import Tensor
from mindspore import context
from mindspore.train.serialization import load_checkpoint, load_param_into_net
from src.network import Network
from src.model_utils.config import config
from src.model_utils.moxing_adapter import moxing_wrapper
parser = argparse.ArgumentParser(description='MD Simulation')
parser.add_argument('--checkpoint_path', type=str, default=None, help='Checkpoint file path')
parser.add_argument('--dataset_path', type=str, default=None, help='Dataset path')
args_opt = parser.parse_args()
context.set_context(mode=context.GRAPH_MODE, save_graphs=False, device_target="Ascend")
context.set_context(mode=context.GRAPH_MODE, save_graphs=False, device_target=config.device_target)
if __name__ == '__main__':
def modelarts_pre_process():
pass
@moxing_wrapper(pre_process=modelarts_pre_process)
def model_eval():
"""
infer network
"""
# get input data
r = np.load(args_opt.dataset_path)
r = np.load(config.dataset_path)
d_coord, d_nlist, avg, std, atype, nlist = r['d_coord'], r['d_nlist'], r['avg'], r['std'], r['atype'], r['nlist']
batch_size = 1
atype_tensor = Tensor(atype)
@ -46,10 +51,14 @@ if __name__ == '__main__':
frames = Tensor(frames)
# evaluation
net = Network()
param_dict = load_checkpoint(args_opt.checkpoint_path)
param_dict = load_checkpoint(config.checkpoint_path)
load_param_into_net(net, param_dict)
net.to_float(mstype.float32)
energy, atom_ener, _ = \
net(d_coord_tensor, d_nlist_tensor, frames, avg_tensor, std_tensor, atype_tensor, nlist_tensor)
print('energy:', energy)
print('atom_energy:', atom_ener)
if __name__ == '__main__':
model_eval()

View File

@ -0,0 +1,130 @@
# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License Version 2.0(the "License");
# you may not use this file except in compliance with the License.
# you may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0#
#
# Unless required by applicable law or agreed to in writing software
# distributed under the License is distributed on an "AS IS" BASIS
# WITHOUT WARRANT IES OR CONITTONS OF ANY KIND either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ====================================================================================
"""Parse arguments"""
import os
import ast
import argparse
from pprint import pprint, pformat
import yaml
_config_path = '../../default_config.yaml'
class Config:
"""
Configuration namespace. Convert dictionary to members
"""
def __init__(self, cfg_dict):
for k, v in cfg_dict.items():
if isinstance(v, (list, tuple)):
setattr(self, k, [Config(x) if isinstance(x, dict) else x for x in v])
else:
setattr(self, k, Config(v) if isinstance(v, dict) else v)
def __str__(self):
return pformat(self.__dict__)
def __repr__(self):
return self.__str__()
def parse_cli_to_yaml(parser, cfg, helper=None, choices=None, cfg_path='default_config.yaml'):
"""
Parse command line arguments to the configuration according to the default yaml
Args:
parser: Parent parser
cfg: Base configuration
helper: Helper description
cfg_path: Path to the default yaml config
"""
parser = argparse.ArgumentParser(description='[REPLACE THIS at config.py]',
parents=[parser])
helper = {} if helper is None else helper
choices = {} if choices is None else choices
for item in cfg:
if not isinstance(cfg[item], list) and not isinstance(cfg[item], dict):
help_description = helper[item] if item in helper else 'Please reference to {}'.format(cfg_path)
choice = choices[item] if item in choices else None
if isinstance(cfg[item], bool):
parser.add_argument('--' + item, type=ast.literal_eval, default=cfg[item], choices=choice,
help=help_description)
else:
parser.add_argument('--' + item, type=type(cfg[item]), default=cfg[item], choices=choice,
help=help_description)
args = parser.parse_args()
return args
def parse_yaml(yaml_path):
"""
Parse the yaml config file
Args:
yaml_path: Path to the yaml config
"""
with open(yaml_path, 'r') as fin:
try:
cfgs = yaml.load_all(fin.read(), Loader=yaml.FullLoader)
cfgs = [x for x in cfgs]
if len(cfgs) == 1:
cfg_helper = {}
cfg = cfgs[0]
cfg_choices = {}
elif len(cfgs) == 2:
cfg, cfg_helper = cfgs
cfg_choices = {}
elif len(cfgs) == 3:
cfg, cfg_helper, cfg_choices = cfgs
else:
raise ValueError('At most 3 docs (config description for help, choices) are supported in config yaml')
print(cfg_helper)
except:
raise ValueError('Failed to parse yaml')
return cfg, cfg_helper, cfg_choices
def merge(args, cfg):
"""
Merge the base config from yaml file and command line arguments
Args:
args: command line arguments
cfg: Base configuration
"""
args_var = vars(args)
for item in args_var:
cfg[item] = args_var[item]
return cfg
def get_config():
"""
Get Config according to the yaml file and cli arguments
"""
parser = argparse.ArgumentParser(description='default name', add_help=False)
current_dir = os.path.dirname(os.path.abspath(__file__))
parser.add_argument('--config_path', type=str, default=os.path.join(current_dir, _config_path),
help='Config file path')
path_args, _ = parser.parse_known_args()
default, helper, choices = parse_yaml(path_args.config_path)
pprint(default)
args = parse_cli_to_yaml(parser=parser, cfg=default, helper=helper, choices=choices, cfg_path=path_args.config_path)
final_config = merge(args, default)
return Config(final_config)
config = get_config()

View File

@ -0,0 +1,26 @@
# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License Version 2.0(the "License");
# you may not use this file except in compliance with the License.
# you may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0#
#
# Unless required by applicable law or agreed to in writing software
# distributed under the License is distributed on an "AS IS" BASIS
# WITHOUT WARRANT IES OR CONITTONS OF ANY KIND either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ====================================================================================
"""Device adapter for ModelArts"""
from .config import config
if config.enable_modelarts:
from .moxing_adapter import get_device_id, get_device_num, get_rank_id, get_job_id
else:
from .local_adapter import get_device_id, get_device_num, get_rank_id, get_job_id
__all__ = [
'get_device_id', 'get_device_num', 'get_job_id', 'get_rank_id'
]

View File

@ -0,0 +1,36 @@
# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License Version 2.0(the "License");
# you may not use this file except in compliance with the License.
# you may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0#
#
# Unless required by applicable law or agreed to in writing software
# distributed under the License is distributed on an "AS IS" BASIS
# WITHOUT WARRANT IES OR CONITTONS OF ANY KIND either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ====================================================================================
"""Local adapter"""
import os
def get_device_id():
device_id = os.getenv('DEVICE_ID', '0')
return int(device_id)
def get_device_num():
device_num = os.getenv('RANK_SIZE', '1')
return int(device_num)
def get_rank_id():
global_rank_id = os.getenv('RANK_ID', '0')
return int(global_rank_id)
def get_job_id():
return 'Local Job'

View File

@ -0,0 +1,124 @@
# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License Version 2.0(the "License");
# you may not use this file except in compliance with the License.
# you may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0#
#
# Unless required by applicable law or agreed to in writing software
# distributed under the License is distributed on an "AS IS" BASIS
# WITHOUT WARRANT IES OR CONITTONS OF ANY KIND either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ====================================================================================
"""Moxing adapter for ModelArts"""
import os
import functools
from mindspore import context
from .config import config
_global_syn_count = 0
def get_device_id():
device_id = os.getenv('DEVICE_ID', '0')
return int(device_id)
def get_device_num():
device_num = os.getenv('RANK_SIZE', '1')
return int(device_num)
def get_rank_id():
global_rank_id = os.getenv('RANK_ID', '0')
return int(global_rank_id)
def get_job_id():
job_id = os.getenv('JOB_ID')
job_id = job_id if job_id != "" else "default"
return job_id
def sync_data(from_path, to_path):
"""
Download data from remote obs to local directory if the first url is remote url and the second one is local
Uploca data from local directory to remote obs in contrast
"""
import moxing as mox
import time
global _global_syn_count
sync_lock = '/tmp/copy_sync.lock' + str(_global_syn_count)
_global_syn_count += 1
# Each server contains 8 devices as most
if get_device_id() % min(get_device_num(), 8) == 0 and not os.path.exists(sync_lock):
print('from path: ', from_path)
print('to path: ', to_path)
mox.file.copy_parallel(from_path, to_path)
print('===finished data synchronization===')
try:
os.mknod(sync_lock)
except IOError:
pass
print('===save flag===')
while True:
if os.path.exists(sync_lock):
break
time.sleep(1)
print('Finish sync data from {} to {}'.format(from_path, to_path))
def moxing_wrapper(pre_process=None, post_process=None):
"""
Moxing wrapper to download dataset and upload outputs
"""
def wrapper(run_func):
@functools.wraps(run_func)
def wrapped_func(*args, **kwargs):
# Download data from data_url
if config.enable_modelarts:
if config.data_url:
sync_data(config.data_url, config.data_path)
print('Dataset downloaded: ', os.listdir(config.data_path))
if config.checkpoint_url:
if not os.path.exists(config.load_path):
# os.makedirs(config.load_path)
print('=' * 20 + 'makedirs')
if os.path.isdir(config.load_path):
print('=' * 20 + 'makedirs success')
else:
print('=' * 20 + 'makedirs fail')
sync_data(config.checkpoint_url, config.load_path)
print('Preload downloaded: ', os.listdir(config.load_path))
if config.train_url:
sync_data(config.train_url, config.output_path)
print('Workspace downloaded: ', os.listdir(config.output_path))
context.set_context(save_graphs_path=os.path.join(config.output_path, str(get_rank_id())))
config.device_num = get_device_num()
config.device_id = get_device_id()
if not os.path.exists(config.output_path):
os.makedirs(config.output_path)
if pre_process:
pre_process()
run_func(*args, **kwargs)
# Upload data to train_url
if config.enable_modelarts:
if post_process:
post_process()
if config.train_url:
print('Start to copy output directory')
sync_data(config.output_path, config.train_url)
return wrapped_func
return wrapper