forked from OSSInnovation/mindspore
adjust model zoo utils
This commit is contained in:
parent
9be17e2a59
commit
748e07eb9e
|
@ -1,40 +0,0 @@
|
||||||
#!/bin/bash
|
|
||||||
# Copyright 2020 Huawei Technologies Co., Ltd
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
# ============================================================================
|
|
||||||
|
|
||||||
rm -f output/train.mindrecord*
|
|
||||||
rm -f output/dev.mindrecord*
|
|
||||||
|
|
||||||
if [ ! -d "../../../third_party/to_mindrecord/CLUERNER2020" ]; then
|
|
||||||
echo "The patch base dir ../../../third_party/to_mindrecord/CLUERNER2020 is not exist."
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ ! -f "../../../third_party/patch/to_mindrecord/CLUERNER2020/data_processor_seq.patch" ]; then
|
|
||||||
echo "The patch file ../../../third_party/patch/to_mindrecord/CLUERNER2020/data_processor_seq.patch is not exist."
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
# patch for data_processor_seq.py
|
|
||||||
patch -p0 -d ../../../third_party/to_mindrecord/CLUERNER2020/ -o data_processor_seq_patched.py < ../../../third_party/patch/to_mindrecord/CLUERNER2020/data_processor_seq.patch
|
|
||||||
if [ $? -ne 0 ]; then
|
|
||||||
echo "Patch ../../../third_party/to_mindrecord/CLUERNER2020/data_processor_seq.py failed"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
# use patched script
|
|
||||||
python ../../../third_party/to_mindrecord/CLUERNER2020/data_processor_seq_patched.py \
|
|
||||||
--vocab_file=../../../third_party/to_mindrecord/CLUERNER2020/vocab.txt \
|
|
||||||
--label2id_file=../../../third_party/to_mindrecord/CLUERNER2020/label2id.json
|
|
|
@ -42,7 +42,7 @@ MINDRECORD_PATH=`pwd`/data_mr
|
||||||
|
|
||||||
rm -f $MINDRECORD_PATH/*
|
rm -f $MINDRECORD_PATH/*
|
||||||
|
|
||||||
cd ../../../example/graph_to_mindrecord || exit
|
cd ../../utils/graph_to_mindrecord || exit
|
||||||
|
|
||||||
python writer.py --mindrecord_script $DATASET_NAME \
|
python writer.py --mindrecord_script $DATASET_NAME \
|
||||||
--mindrecord_file "$MINDRECORD_PATH/$DATASET_NAME" \
|
--mindrecord_file "$MINDRECORD_PATH/$DATASET_NAME" \
|
||||||
|
|
|
@ -43,7 +43,7 @@ MINDRECORD_PATH=`pwd`/data_mr
|
||||||
rm -f $MINDRECORD_PATH/$DATASET_NAME
|
rm -f $MINDRECORD_PATH/$DATASET_NAME
|
||||||
rm -f $MINDRECORD_PATH/$DATASET_NAME.db
|
rm -f $MINDRECORD_PATH/$DATASET_NAME.db
|
||||||
|
|
||||||
cd ../../../example/graph_to_mindrecord || exit
|
cd ../../utils/graph_to_mindrecord || exit
|
||||||
|
|
||||||
python writer.py --mindrecord_script $DATASET_NAME \
|
python writer.py --mindrecord_script $DATASET_NAME \
|
||||||
--mindrecord_file "$MINDRECORD_PATH/$DATASET_NAME" \
|
--mindrecord_file "$MINDRECORD_PATH/$DATASET_NAME" \
|
||||||
|
|
|
@ -0,0 +1,40 @@
|
||||||
|
#!/bin/bash
|
||||||
|
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
rm -f output/train.mindrecord*
|
||||||
|
rm -f output/dev.mindrecord*
|
||||||
|
|
||||||
|
if [ ! -d "../../../../third_party/to_mindrecord/CLUERNER2020" ]; then
|
||||||
|
echo "The patch base dir ../../../../third_party/to_mindrecord/CLUERNER2020 is not exist."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ ! -f "../../../../third_party/patch/to_mindrecord/CLUERNER2020/data_processor_seq.patch" ]; then
|
||||||
|
echo "The patch file ../../../../third_party/patch/to_mindrecord/CLUERNER2020/data_processor_seq.patch is not exist."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# patch for data_processor_seq.py
|
||||||
|
patch -p0 -d ../../../../third_party/to_mindrecord/CLUERNER2020/ -o data_processor_seq_patched.py < ../../../../third_party/patch/to_mindrecord/CLUERNER2020/data_processor_seq.patch
|
||||||
|
if [ $? -ne 0 ]; then
|
||||||
|
echo "Patch ../../../../third_party/to_mindrecord/CLUERNER2020/data_processor_seq.py failed"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# use patched script
|
||||||
|
python ../../../../third_party/to_mindrecord/CLUERNER2020/data_processor_seq_patched.py \
|
||||||
|
--vocab_file=../../../../third_party/to_mindrecord/CLUERNER2020/vocab.txt \
|
||||||
|
--label2id_file=../../../../third_party/to_mindrecord/CLUERNER2020/label2id.json
|
|
@ -66,20 +66,20 @@ getdir "${data_dir}"
|
||||||
# echo "The input files: "${file_list[@]}
|
# echo "The input files: "${file_list[@]}
|
||||||
# echo "The output files: "${output_filename[@]}
|
# echo "The output files: "${output_filename[@]}
|
||||||
|
|
||||||
if [ ! -d "../../../third_party/to_mindrecord/zhwiki" ]; then
|
if [ ! -d "../../../../third_party/to_mindrecord/zhwiki" ]; then
|
||||||
echo "The patch base dir ../../../third_party/to_mindrecord/zhwiki is not exist."
|
echo "The patch base dir ../../../../third_party/to_mindrecord/zhwiki is not exist."
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ ! -f "../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch" ]; then
|
if [ ! -f "../../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch" ]; then
|
||||||
echo "The patch file ../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch is not exist."
|
echo "The patch file ../../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch is not exist."
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# patch for create_pretraining_data.py
|
# patch for create_pretraining_data.py
|
||||||
patch -p0 -d ../../../third_party/to_mindrecord/zhwiki/ -o create_pretraining_data_patched.py < ../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch
|
patch -p0 -d ../../../../third_party/to_mindrecord/zhwiki/ -o create_pretraining_data_patched.py < ../../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch
|
||||||
if [ $? -ne 0 ]; then
|
if [ $? -ne 0 ]; then
|
||||||
echo "Patch ../../../third_party/to_mindrecord/zhwiki/create_pretraining_data.py failed"
|
echo "Patch ../../../../third_party/to_mindrecord/zhwiki/create_pretraining_data.py failed"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
@ -94,7 +94,7 @@ file_list_len=`expr ${#file_list[*]} - 1`
|
||||||
for index in $(seq 0 $file_list_len); do
|
for index in $(seq 0 $file_list_len); do
|
||||||
echo "Begin preprocess input file: ${file_list[$index]}"
|
echo "Begin preprocess input file: ${file_list[$index]}"
|
||||||
echo "Begin output file: ${output_filename[$index]}"
|
echo "Begin output file: ${output_filename[$index]}"
|
||||||
python ../../../third_party/to_mindrecord/zhwiki/create_pretraining_data_patched.py \
|
python ../../../../third_party/to_mindrecord/zhwiki/create_pretraining_data_patched.py \
|
||||||
--input_file=${file_list[$index]} \
|
--input_file=${file_list[$index]} \
|
||||||
--output_file=${output_dir}/${output_filename[$index]} \
|
--output_file=${output_dir}/${output_filename[$index]} \
|
||||||
--partition_number=1 \
|
--partition_number=1 \
|
|
@ -26,7 +26,7 @@ This example is based on [zhwiki](https://dumps.wikimedia.org/zhwiki) training d
|
||||||
Follow the step:
|
Follow the step:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
bash run_simple.sh # generate output/simple.mindrecord* by ../../../third_party/to_mindrecord/zhwiki/sample_text.txt
|
bash run_simple.sh # generate output/simple.mindrecord* by ../../../../third_party/to_mindrecord/zhwiki/sample_text.txt
|
||||||
bash run_read_simple.sh # use MindDataset to read output/simple.mindrecord*
|
bash run_read_simple.sh # use MindDataset to read output/simple.mindrecord*
|
||||||
```
|
```
|
||||||
|
|
|
@ -45,20 +45,20 @@ getdir "${data_dir}"
|
||||||
# echo "The input files: "${file_list[@]}
|
# echo "The input files: "${file_list[@]}
|
||||||
# echo "The output files: "${output_filename[@]}
|
# echo "The output files: "${output_filename[@]}
|
||||||
|
|
||||||
if [ ! -d "../../../third_party/to_mindrecord/zhwiki" ]; then
|
if [ ! -d "../../../../third_party/to_mindrecord/zhwiki" ]; then
|
||||||
echo "The patch base dir ../../../third_party/to_mindrecord/zhwiki is not exist."
|
echo "The patch base dir ../../../../third_party/to_mindrecord/zhwiki is not exist."
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ ! -f "../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch" ]; then
|
if [ ! -f "../../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch" ]; then
|
||||||
echo "The patch file ../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch is not exist."
|
echo "The patch file ../../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch is not exist."
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# patch for create_pretraining_data.py
|
# patch for create_pretraining_data.py
|
||||||
patch -p0 -d ../../../third_party/to_mindrecord/zhwiki/ -o create_pretraining_data_patched.py < ../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch
|
patch -p0 -d ../../../../third_party/to_mindrecord/zhwiki/ -o create_pretraining_data_patched.py < ../../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch
|
||||||
if [ $? -ne 0 ]; then
|
if [ $? -ne 0 ]; then
|
||||||
echo "Patch ../../../third_party/to_mindrecord/zhwiki/create_pretraining_data.py failed"
|
echo "Patch ../../../../third_party/to_mindrecord/zhwiki/create_pretraining_data.py failed"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
@ -73,11 +73,11 @@ file_list_len=`expr ${#file_list[*]} - 1`
|
||||||
for index in $(seq 0 $file_list_len); do
|
for index in $(seq 0 $file_list_len); do
|
||||||
echo "Begin preprocess input file: ${file_list[$index]}"
|
echo "Begin preprocess input file: ${file_list[$index]}"
|
||||||
echo "Begin output file: ${output_filename[$index]}"
|
echo "Begin output file: ${output_filename[$index]}"
|
||||||
python ../../../third_party/to_mindrecord/zhwiki/create_pretraining_data_patched.py \
|
python ../../../../third_party/to_mindrecord/zhwiki/create_pretraining_data_patched.py \
|
||||||
--input_file=${file_list[$index]} \
|
--input_file=${file_list[$index]} \
|
||||||
--output_file=output/${output_filename[$index]} \
|
--output_file=output/${output_filename[$index]} \
|
||||||
--partition_number=1 \
|
--partition_number=1 \
|
||||||
--vocab_file=../../../third_party/to_mindrecord/zhwiki/vocab.txt \
|
--vocab_file=../../../../third_party/to_mindrecord/zhwiki/vocab.txt \
|
||||||
--do_lower_case=True \
|
--do_lower_case=True \
|
||||||
--max_seq_length=128 \
|
--max_seq_length=128 \
|
||||||
--max_predictions_per_seq=20 \
|
--max_predictions_per_seq=20 \
|
|
@ -15,4 +15,4 @@
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
|
|
||||||
# create dataset for train
|
# create dataset for train
|
||||||
python create_dataset.py --input_file=output/simple.mindrecord0
|
python create_dataset.py --input_file=output/simple.mindrecord
|
|
@ -16,29 +16,29 @@
|
||||||
|
|
||||||
rm -f output/simple.mindrecord*
|
rm -f output/simple.mindrecord*
|
||||||
|
|
||||||
if [ ! -d "../../../third_party/to_mindrecord/zhwiki" ]; then
|
if [ ! -d "../../../../third_party/to_mindrecord/zhwiki" ]; then
|
||||||
echo "The patch base dir ../../../third_party/to_mindrecord/zhwiki is not exist."
|
echo "The patch base dir ../../../../third_party/to_mindrecord/zhwiki is not exist."
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ ! -f "../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch" ]; then
|
if [ ! -f "../../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch" ]; then
|
||||||
echo "The patch file ../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch is not exist."
|
echo "The patch file ../../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch is not exist."
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# patch for create_pretraining_data.py
|
# patch for create_pretraining_data.py
|
||||||
patch -p0 -d ../../../third_party/to_mindrecord/zhwiki/ -o create_pretraining_data_patched.py < ../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch
|
patch -p0 -d ../../../../third_party/to_mindrecord/zhwiki/ -o create_pretraining_data_patched.py < ../../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch
|
||||||
if [ $? -ne 0 ]; then
|
if [ $? -ne 0 ]; then
|
||||||
echo "Patch ../../../third_party/to_mindrecord/zhwiki/create_pretraining_data.py failed"
|
echo "Patch ../../../../third_party/to_mindrecord/zhwiki/create_pretraining_data.py failed"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# using patched script to generate mindrecord
|
# using patched script to generate mindrecord
|
||||||
python ../../../third_party/to_mindrecord/zhwiki/create_pretraining_data_patched.py \
|
python ../../../../third_party/to_mindrecord/zhwiki/create_pretraining_data_patched.py \
|
||||||
--input_file=../../../third_party/to_mindrecord/zhwiki/sample_text.txt \
|
--input_file=../../../../third_party/to_mindrecord/zhwiki/sample_text.txt \
|
||||||
--output_file=output/simple.mindrecord \
|
--output_file=output/simple.mindrecord \
|
||||||
--partition_number=4 \
|
--partition_number=1 \
|
||||||
--vocab_file=../../../third_party/to_mindrecord/zhwiki/vocab.txt \
|
--vocab_file=../../../../third_party/to_mindrecord/zhwiki/vocab.txt \
|
||||||
--do_lower_case=True \
|
--do_lower_case=True \
|
||||||
--max_seq_length=128 \
|
--max_seq_length=128 \
|
||||||
--max_predictions_per_seq=20 \
|
--max_predictions_per_seq=20 \
|
Loading…
Reference in New Issue