forked from mindspore-Ecosystem/mindspore
adjust model zoo utils
This commit is contained in:
parent
9be17e2a59
commit
748e07eb9e
|
@ -1,40 +0,0 @@
|
|||
#!/bin/bash
|
||||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
|
||||
rm -f output/train.mindrecord*
|
||||
rm -f output/dev.mindrecord*
|
||||
|
||||
if [ ! -d "../../../third_party/to_mindrecord/CLUERNER2020" ]; then
|
||||
echo "The patch base dir ../../../third_party/to_mindrecord/CLUERNER2020 is not exist."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ ! -f "../../../third_party/patch/to_mindrecord/CLUERNER2020/data_processor_seq.patch" ]; then
|
||||
echo "The patch file ../../../third_party/patch/to_mindrecord/CLUERNER2020/data_processor_seq.patch is not exist."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# patch for data_processor_seq.py
|
||||
patch -p0 -d ../../../third_party/to_mindrecord/CLUERNER2020/ -o data_processor_seq_patched.py < ../../../third_party/patch/to_mindrecord/CLUERNER2020/data_processor_seq.patch
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "Patch ../../../third_party/to_mindrecord/CLUERNER2020/data_processor_seq.py failed"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# use patched script
|
||||
python ../../../third_party/to_mindrecord/CLUERNER2020/data_processor_seq_patched.py \
|
||||
--vocab_file=../../../third_party/to_mindrecord/CLUERNER2020/vocab.txt \
|
||||
--label2id_file=../../../third_party/to_mindrecord/CLUERNER2020/label2id.json
|
|
@ -42,7 +42,7 @@ MINDRECORD_PATH=`pwd`/data_mr
|
|||
|
||||
rm -f $MINDRECORD_PATH/*
|
||||
|
||||
cd ../../../example/graph_to_mindrecord || exit
|
||||
cd ../../utils/graph_to_mindrecord || exit
|
||||
|
||||
python writer.py --mindrecord_script $DATASET_NAME \
|
||||
--mindrecord_file "$MINDRECORD_PATH/$DATASET_NAME" \
|
||||
|
|
|
@ -43,7 +43,7 @@ MINDRECORD_PATH=`pwd`/data_mr
|
|||
rm -f $MINDRECORD_PATH/$DATASET_NAME
|
||||
rm -f $MINDRECORD_PATH/$DATASET_NAME.db
|
||||
|
||||
cd ../../../example/graph_to_mindrecord || exit
|
||||
cd ../../utils/graph_to_mindrecord || exit
|
||||
|
||||
python writer.py --mindrecord_script $DATASET_NAME \
|
||||
--mindrecord_file "$MINDRECORD_PATH/$DATASET_NAME" \
|
||||
|
|
|
@ -0,0 +1,40 @@
|
|||
#!/bin/bash
|
||||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
|
||||
rm -f output/train.mindrecord*
|
||||
rm -f output/dev.mindrecord*
|
||||
|
||||
if [ ! -d "../../../../third_party/to_mindrecord/CLUERNER2020" ]; then
|
||||
echo "The patch base dir ../../../../third_party/to_mindrecord/CLUERNER2020 is not exist."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ ! -f "../../../../third_party/patch/to_mindrecord/CLUERNER2020/data_processor_seq.patch" ]; then
|
||||
echo "The patch file ../../../../third_party/patch/to_mindrecord/CLUERNER2020/data_processor_seq.patch is not exist."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# patch for data_processor_seq.py
|
||||
patch -p0 -d ../../../../third_party/to_mindrecord/CLUERNER2020/ -o data_processor_seq_patched.py < ../../../../third_party/patch/to_mindrecord/CLUERNER2020/data_processor_seq.patch
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "Patch ../../../../third_party/to_mindrecord/CLUERNER2020/data_processor_seq.py failed"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# use patched script
|
||||
python ../../../../third_party/to_mindrecord/CLUERNER2020/data_processor_seq_patched.py \
|
||||
--vocab_file=../../../../third_party/to_mindrecord/CLUERNER2020/vocab.txt \
|
||||
--label2id_file=../../../../third_party/to_mindrecord/CLUERNER2020/label2id.json
|
|
@ -66,20 +66,20 @@ getdir "${data_dir}"
|
|||
# echo "The input files: "${file_list[@]}
|
||||
# echo "The output files: "${output_filename[@]}
|
||||
|
||||
if [ ! -d "../../../third_party/to_mindrecord/zhwiki" ]; then
|
||||
echo "The patch base dir ../../../third_party/to_mindrecord/zhwiki is not exist."
|
||||
if [ ! -d "../../../../third_party/to_mindrecord/zhwiki" ]; then
|
||||
echo "The patch base dir ../../../../third_party/to_mindrecord/zhwiki is not exist."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ ! -f "../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch" ]; then
|
||||
echo "The patch file ../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch is not exist."
|
||||
if [ ! -f "../../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch" ]; then
|
||||
echo "The patch file ../../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch is not exist."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# patch for create_pretraining_data.py
|
||||
patch -p0 -d ../../../third_party/to_mindrecord/zhwiki/ -o create_pretraining_data_patched.py < ../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch
|
||||
patch -p0 -d ../../../../third_party/to_mindrecord/zhwiki/ -o create_pretraining_data_patched.py < ../../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "Patch ../../../third_party/to_mindrecord/zhwiki/create_pretraining_data.py failed"
|
||||
echo "Patch ../../../../third_party/to_mindrecord/zhwiki/create_pretraining_data.py failed"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
|
@ -94,7 +94,7 @@ file_list_len=`expr ${#file_list[*]} - 1`
|
|||
for index in $(seq 0 $file_list_len); do
|
||||
echo "Begin preprocess input file: ${file_list[$index]}"
|
||||
echo "Begin output file: ${output_filename[$index]}"
|
||||
python ../../../third_party/to_mindrecord/zhwiki/create_pretraining_data_patched.py \
|
||||
python ../../../../third_party/to_mindrecord/zhwiki/create_pretraining_data_patched.py \
|
||||
--input_file=${file_list[$index]} \
|
||||
--output_file=${output_dir}/${output_filename[$index]} \
|
||||
--partition_number=1 \
|
|
@ -26,7 +26,7 @@ This example is based on [zhwiki](https://dumps.wikimedia.org/zhwiki) training d
|
|||
Follow the step:
|
||||
|
||||
```bash
|
||||
bash run_simple.sh # generate output/simple.mindrecord* by ../../../third_party/to_mindrecord/zhwiki/sample_text.txt
|
||||
bash run_simple.sh # generate output/simple.mindrecord* by ../../../../third_party/to_mindrecord/zhwiki/sample_text.txt
|
||||
bash run_read_simple.sh # use MindDataset to read output/simple.mindrecord*
|
||||
```
|
||||
|
|
@ -45,20 +45,20 @@ getdir "${data_dir}"
|
|||
# echo "The input files: "${file_list[@]}
|
||||
# echo "The output files: "${output_filename[@]}
|
||||
|
||||
if [ ! -d "../../../third_party/to_mindrecord/zhwiki" ]; then
|
||||
echo "The patch base dir ../../../third_party/to_mindrecord/zhwiki is not exist."
|
||||
if [ ! -d "../../../../third_party/to_mindrecord/zhwiki" ]; then
|
||||
echo "The patch base dir ../../../../third_party/to_mindrecord/zhwiki is not exist."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ ! -f "../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch" ]; then
|
||||
echo "The patch file ../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch is not exist."
|
||||
if [ ! -f "../../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch" ]; then
|
||||
echo "The patch file ../../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch is not exist."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# patch for create_pretraining_data.py
|
||||
patch -p0 -d ../../../third_party/to_mindrecord/zhwiki/ -o create_pretraining_data_patched.py < ../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch
|
||||
patch -p0 -d ../../../../third_party/to_mindrecord/zhwiki/ -o create_pretraining_data_patched.py < ../../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "Patch ../../../third_party/to_mindrecord/zhwiki/create_pretraining_data.py failed"
|
||||
echo "Patch ../../../../third_party/to_mindrecord/zhwiki/create_pretraining_data.py failed"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
|
@ -73,11 +73,11 @@ file_list_len=`expr ${#file_list[*]} - 1`
|
|||
for index in $(seq 0 $file_list_len); do
|
||||
echo "Begin preprocess input file: ${file_list[$index]}"
|
||||
echo "Begin output file: ${output_filename[$index]}"
|
||||
python ../../../third_party/to_mindrecord/zhwiki/create_pretraining_data_patched.py \
|
||||
python ../../../../third_party/to_mindrecord/zhwiki/create_pretraining_data_patched.py \
|
||||
--input_file=${file_list[$index]} \
|
||||
--output_file=output/${output_filename[$index]} \
|
||||
--partition_number=1 \
|
||||
--vocab_file=../../../third_party/to_mindrecord/zhwiki/vocab.txt \
|
||||
--vocab_file=../../../../third_party/to_mindrecord/zhwiki/vocab.txt \
|
||||
--do_lower_case=True \
|
||||
--max_seq_length=128 \
|
||||
--max_predictions_per_seq=20 \
|
|
@ -15,4 +15,4 @@
|
|||
# ============================================================================
|
||||
|
||||
# create dataset for train
|
||||
python create_dataset.py --input_file=output/simple.mindrecord0
|
||||
python create_dataset.py --input_file=output/simple.mindrecord
|
|
@ -16,29 +16,29 @@
|
|||
|
||||
rm -f output/simple.mindrecord*
|
||||
|
||||
if [ ! -d "../../../third_party/to_mindrecord/zhwiki" ]; then
|
||||
echo "The patch base dir ../../../third_party/to_mindrecord/zhwiki is not exist."
|
||||
if [ ! -d "../../../../third_party/to_mindrecord/zhwiki" ]; then
|
||||
echo "The patch base dir ../../../../third_party/to_mindrecord/zhwiki is not exist."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ ! -f "../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch" ]; then
|
||||
echo "The patch file ../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch is not exist."
|
||||
if [ ! -f "../../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch" ]; then
|
||||
echo "The patch file ../../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch is not exist."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# patch for create_pretraining_data.py
|
||||
patch -p0 -d ../../../third_party/to_mindrecord/zhwiki/ -o create_pretraining_data_patched.py < ../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch
|
||||
patch -p0 -d ../../../../third_party/to_mindrecord/zhwiki/ -o create_pretraining_data_patched.py < ../../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "Patch ../../../third_party/to_mindrecord/zhwiki/create_pretraining_data.py failed"
|
||||
echo "Patch ../../../../third_party/to_mindrecord/zhwiki/create_pretraining_data.py failed"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# using patched script to generate mindrecord
|
||||
python ../../../third_party/to_mindrecord/zhwiki/create_pretraining_data_patched.py \
|
||||
--input_file=../../../third_party/to_mindrecord/zhwiki/sample_text.txt \
|
||||
python ../../../../third_party/to_mindrecord/zhwiki/create_pretraining_data_patched.py \
|
||||
--input_file=../../../../third_party/to_mindrecord/zhwiki/sample_text.txt \
|
||||
--output_file=output/simple.mindrecord \
|
||||
--partition_number=4 \
|
||||
--vocab_file=../../../third_party/to_mindrecord/zhwiki/vocab.txt \
|
||||
--partition_number=1 \
|
||||
--vocab_file=../../../../third_party/to_mindrecord/zhwiki/vocab.txt \
|
||||
--do_lower_case=True \
|
||||
--max_seq_length=128 \
|
||||
--max_predictions_per_seq=20 \
|
Loading…
Reference in New Issue