forked from mindspore-Ecosystem/mindspore
112 lines
3.5 KiB
YAML
112 lines
3.5 KiB
YAML
# Builtin Configurations(DO NOT CHANGE THESE CONFIGURATIONS unless you know exactly what you are doing)
|
|
enable_modelarts: False
|
|
# Url for modelarts
|
|
data_url: ""
|
|
train_url: ""
|
|
checkpoint_url: ""
|
|
# Path for local
|
|
data_path: "/cache/data"
|
|
output_path: "/cache/train"
|
|
load_path: "/cache/checkpoint_path"
|
|
device_target: "Ascend"
|
|
enable_profiling: False
|
|
|
|
# ==============================================================================
|
|
description: "run_squad"
|
|
do_train: "false"
|
|
do_eval: "false"
|
|
device_id: 0
|
|
epoch_num: 3
|
|
num_class: 2
|
|
train_data_shuffle: "true"
|
|
eval_data_shuffle: "false"
|
|
train_batch_size: 32
|
|
eval_batch_size: 1
|
|
vocab_file_path: ""
|
|
eval_json_path: ""
|
|
save_finetune_checkpoint_path: "./squad_finetune/ckpt/"
|
|
load_pretrain_checkpoint_path: ""
|
|
load_finetune_checkpoint_path: ""
|
|
train_data_file_path: ""
|
|
schema_file_path: ""
|
|
# export related
|
|
export_batch_size: 1
|
|
export_ckpt_file: ''
|
|
export_file_name: 'bert_squad'
|
|
file_format: 'AIR'
|
|
|
|
optimizer_cfg:
|
|
optimizer: 'Momentum'
|
|
AdamWeightDecay:
|
|
learning_rate: 0.0001 # 1e-4
|
|
end_learning_rate: 0.00000000001 # 1e-11
|
|
power: 5.0
|
|
weight_decay: 0.001 # 1e-3
|
|
decay_filter: ['layernorm', 'bias']
|
|
eps: 0.000001 # 1e-6
|
|
Lamb:
|
|
learning_rate: 0.0001 # 1e-4,
|
|
end_learning_rate: 0.00000000001 # 1e-11
|
|
power: 5.0
|
|
weight_decay: 0.01
|
|
decay_filter: ['layernorm', 'bias']
|
|
Momentum:
|
|
learning_rate: 0.0001 # 1e-4
|
|
momentum: 0.9
|
|
|
|
bert_net_cfg:
|
|
seq_length: 384
|
|
vocab_size: 30522
|
|
hidden_size: 768
|
|
num_hidden_layers: 12
|
|
num_attention_heads: 12
|
|
intermediate_size: 3072
|
|
hidden_act: "gelu"
|
|
hidden_dropout_prob: 0.1
|
|
attention_probs_dropout_prob: 0.1
|
|
max_position_embeddings: 512
|
|
type_vocab_size: 2
|
|
initializer_range: 0.02
|
|
use_relative_positions: False
|
|
dtype: mstype.float32
|
|
compute_type: mstype.float16
|
|
|
|
---
|
|
# Help description for each configuration
|
|
enable_modelarts: "Whether training on modelarts, default: False"
|
|
data_url: "Url for modelarts"
|
|
train_url: "Url for modelarts"
|
|
data_path: "The location of the input data."
|
|
output_path: "The location of the output file."
|
|
device_target: "Running platform, choose from Ascend or CPU, and default is Ascend."
|
|
enable_profiling: 'Whether enable profiling while training, default: False'
|
|
|
|
do_train: "Eable train, default is false"
|
|
do_eval: "Eable eval, default is false"
|
|
device_id: "Device id, default is 0."
|
|
epoch_num: "Epoch number, default is 1."
|
|
num_class: "The number of class, default is 2."
|
|
train_data_shuffle: "Enable train data shuffle, default is true"
|
|
eval_data_shuffle: "Enable eval data shuffle, default is false"
|
|
train_batch_size: "Train batch size, default is 32"
|
|
eval_batch_size: "Eval batch size, default is 1"
|
|
vocab_file_path: "Vocab file path"
|
|
eval_json_path: "Evaluation json file path, can be eval.json"
|
|
save_finetune_checkpoint_path: "Save checkpoint path"
|
|
load_pretrain_checkpoint_path: "Load checkpoint file path"
|
|
load_finetune_checkpoint_path: "Load checkpoint file path"
|
|
train_data_file_path: "Data path, it is better to use absolute path"
|
|
schema_file_path: "Schema path, it is better to use absolute path"
|
|
|
|
export_batch_size: "export batch size."
|
|
export_ckpt_file: "Bert ckpt file."
|
|
export_file_name: "bert output air name."
|
|
file_format: "file format"
|
|
---
|
|
# chocies
|
|
device_target: ['Ascend', 'GPU']
|
|
do_train: ["true", "false"]
|
|
do_eval: ["true", "false"]
|
|
train_data_shuffle: ["true", "false"]
|
|
eval_data_shuffle: ["true", "false"]
|
|
file_format: ["AIR", "ONNX", "MINDIR"] |