adjust model zoo utils

This commit is contained in:
liyong 2020-06-28 17:09:56 +08:00 committed by jonyguo
parent 9be17e2a59
commit 748e07eb9e
69 changed files with 69 additions and 69 deletions

View File

@ -1,40 +0,0 @@
#!/bin/bash
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
rm -f output/train.mindrecord*
rm -f output/dev.mindrecord*
if [ ! -d "../../../third_party/to_mindrecord/CLUERNER2020" ]; then
echo "The patch base dir ../../../third_party/to_mindrecord/CLUERNER2020 is not exist."
exit 1
fi
if [ ! -f "../../../third_party/patch/to_mindrecord/CLUERNER2020/data_processor_seq.patch" ]; then
echo "The patch file ../../../third_party/patch/to_mindrecord/CLUERNER2020/data_processor_seq.patch is not exist."
exit 1
fi
# patch for data_processor_seq.py
patch -p0 -d ../../../third_party/to_mindrecord/CLUERNER2020/ -o data_processor_seq_patched.py < ../../../third_party/patch/to_mindrecord/CLUERNER2020/data_processor_seq.patch
if [ $? -ne 0 ]; then
echo "Patch ../../../third_party/to_mindrecord/CLUERNER2020/data_processor_seq.py failed"
exit 1
fi
# use patched script
python ../../../third_party/to_mindrecord/CLUERNER2020/data_processor_seq_patched.py \
--vocab_file=../../../third_party/to_mindrecord/CLUERNER2020/vocab.txt \
--label2id_file=../../../third_party/to_mindrecord/CLUERNER2020/label2id.json

View File

@ -42,7 +42,7 @@ MINDRECORD_PATH=`pwd`/data_mr
rm -f $MINDRECORD_PATH/* rm -f $MINDRECORD_PATH/*
cd ../../../example/graph_to_mindrecord || exit cd ../../utils/graph_to_mindrecord || exit
python writer.py --mindrecord_script $DATASET_NAME \ python writer.py --mindrecord_script $DATASET_NAME \
--mindrecord_file "$MINDRECORD_PATH/$DATASET_NAME" \ --mindrecord_file "$MINDRECORD_PATH/$DATASET_NAME" \

View File

@ -43,7 +43,7 @@ MINDRECORD_PATH=`pwd`/data_mr
rm -f $MINDRECORD_PATH/$DATASET_NAME rm -f $MINDRECORD_PATH/$DATASET_NAME
rm -f $MINDRECORD_PATH/$DATASET_NAME.db rm -f $MINDRECORD_PATH/$DATASET_NAME.db
cd ../../../example/graph_to_mindrecord || exit cd ../../utils/graph_to_mindrecord || exit
python writer.py --mindrecord_script $DATASET_NAME \ python writer.py --mindrecord_script $DATASET_NAME \
--mindrecord_file "$MINDRECORD_PATH/$DATASET_NAME" \ --mindrecord_file "$MINDRECORD_PATH/$DATASET_NAME" \

View File

@ -0,0 +1,40 @@
#!/bin/bash
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
rm -f output/train.mindrecord*
rm -f output/dev.mindrecord*
if [ ! -d "../../../../third_party/to_mindrecord/CLUERNER2020" ]; then
echo "The patch base dir ../../../../third_party/to_mindrecord/CLUERNER2020 is not exist."
exit 1
fi
if [ ! -f "../../../../third_party/patch/to_mindrecord/CLUERNER2020/data_processor_seq.patch" ]; then
echo "The patch file ../../../../third_party/patch/to_mindrecord/CLUERNER2020/data_processor_seq.patch is not exist."
exit 1
fi
# patch for data_processor_seq.py
patch -p0 -d ../../../../third_party/to_mindrecord/CLUERNER2020/ -o data_processor_seq_patched.py < ../../../../third_party/patch/to_mindrecord/CLUERNER2020/data_processor_seq.patch
if [ $? -ne 0 ]; then
echo "Patch ../../../../third_party/to_mindrecord/CLUERNER2020/data_processor_seq.py failed"
exit 1
fi
# use patched script
python ../../../../third_party/to_mindrecord/CLUERNER2020/data_processor_seq_patched.py \
--vocab_file=../../../../third_party/to_mindrecord/CLUERNER2020/vocab.txt \
--label2id_file=../../../../third_party/to_mindrecord/CLUERNER2020/label2id.json

View File

@ -66,20 +66,20 @@ getdir "${data_dir}"
# echo "The input files: "${file_list[@]} # echo "The input files: "${file_list[@]}
# echo "The output files: "${output_filename[@]} # echo "The output files: "${output_filename[@]}
if [ ! -d "../../../third_party/to_mindrecord/zhwiki" ]; then if [ ! -d "../../../../third_party/to_mindrecord/zhwiki" ]; then
echo "The patch base dir ../../../third_party/to_mindrecord/zhwiki is not exist." echo "The patch base dir ../../../../third_party/to_mindrecord/zhwiki is not exist."
exit 1 exit 1
fi fi
if [ ! -f "../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch" ]; then if [ ! -f "../../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch" ]; then
echo "The patch file ../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch is not exist." echo "The patch file ../../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch is not exist."
exit 1 exit 1
fi fi
# patch for create_pretraining_data.py # patch for create_pretraining_data.py
patch -p0 -d ../../../third_party/to_mindrecord/zhwiki/ -o create_pretraining_data_patched.py < ../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch patch -p0 -d ../../../../third_party/to_mindrecord/zhwiki/ -o create_pretraining_data_patched.py < ../../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch
if [ $? -ne 0 ]; then if [ $? -ne 0 ]; then
echo "Patch ../../../third_party/to_mindrecord/zhwiki/create_pretraining_data.py failed" echo "Patch ../../../../third_party/to_mindrecord/zhwiki/create_pretraining_data.py failed"
exit 1 exit 1
fi fi
@ -94,7 +94,7 @@ file_list_len=`expr ${#file_list[*]} - 1`
for index in $(seq 0 $file_list_len); do for index in $(seq 0 $file_list_len); do
echo "Begin preprocess input file: ${file_list[$index]}" echo "Begin preprocess input file: ${file_list[$index]}"
echo "Begin output file: ${output_filename[$index]}" echo "Begin output file: ${output_filename[$index]}"
python ../../../third_party/to_mindrecord/zhwiki/create_pretraining_data_patched.py \ python ../../../../third_party/to_mindrecord/zhwiki/create_pretraining_data_patched.py \
--input_file=${file_list[$index]} \ --input_file=${file_list[$index]} \
--output_file=${output_dir}/${output_filename[$index]} \ --output_file=${output_dir}/${output_filename[$index]} \
--partition_number=1 \ --partition_number=1 \

View File

@ -26,7 +26,7 @@ This example is based on [zhwiki](https://dumps.wikimedia.org/zhwiki) training d
Follow the step: Follow the step:
```bash ```bash
bash run_simple.sh # generate output/simple.mindrecord* by ../../../third_party/to_mindrecord/zhwiki/sample_text.txt bash run_simple.sh # generate output/simple.mindrecord* by ../../../../third_party/to_mindrecord/zhwiki/sample_text.txt
bash run_read_simple.sh # use MindDataset to read output/simple.mindrecord* bash run_read_simple.sh # use MindDataset to read output/simple.mindrecord*
``` ```

View File

@ -45,20 +45,20 @@ getdir "${data_dir}"
# echo "The input files: "${file_list[@]} # echo "The input files: "${file_list[@]}
# echo "The output files: "${output_filename[@]} # echo "The output files: "${output_filename[@]}
if [ ! -d "../../../third_party/to_mindrecord/zhwiki" ]; then if [ ! -d "../../../../third_party/to_mindrecord/zhwiki" ]; then
echo "The patch base dir ../../../third_party/to_mindrecord/zhwiki is not exist." echo "The patch base dir ../../../../third_party/to_mindrecord/zhwiki is not exist."
exit 1 exit 1
fi fi
if [ ! -f "../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch" ]; then if [ ! -f "../../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch" ]; then
echo "The patch file ../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch is not exist." echo "The patch file ../../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch is not exist."
exit 1 exit 1
fi fi
# patch for create_pretraining_data.py # patch for create_pretraining_data.py
patch -p0 -d ../../../third_party/to_mindrecord/zhwiki/ -o create_pretraining_data_patched.py < ../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch patch -p0 -d ../../../../third_party/to_mindrecord/zhwiki/ -o create_pretraining_data_patched.py < ../../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch
if [ $? -ne 0 ]; then if [ $? -ne 0 ]; then
echo "Patch ../../../third_party/to_mindrecord/zhwiki/create_pretraining_data.py failed" echo "Patch ../../../../third_party/to_mindrecord/zhwiki/create_pretraining_data.py failed"
exit 1 exit 1
fi fi
@ -73,11 +73,11 @@ file_list_len=`expr ${#file_list[*]} - 1`
for index in $(seq 0 $file_list_len); do for index in $(seq 0 $file_list_len); do
echo "Begin preprocess input file: ${file_list[$index]}" echo "Begin preprocess input file: ${file_list[$index]}"
echo "Begin output file: ${output_filename[$index]}" echo "Begin output file: ${output_filename[$index]}"
python ../../../third_party/to_mindrecord/zhwiki/create_pretraining_data_patched.py \ python ../../../../third_party/to_mindrecord/zhwiki/create_pretraining_data_patched.py \
--input_file=${file_list[$index]} \ --input_file=${file_list[$index]} \
--output_file=output/${output_filename[$index]} \ --output_file=output/${output_filename[$index]} \
--partition_number=1 \ --partition_number=1 \
--vocab_file=../../../third_party/to_mindrecord/zhwiki/vocab.txt \ --vocab_file=../../../../third_party/to_mindrecord/zhwiki/vocab.txt \
--do_lower_case=True \ --do_lower_case=True \
--max_seq_length=128 \ --max_seq_length=128 \
--max_predictions_per_seq=20 \ --max_predictions_per_seq=20 \

View File

@ -15,4 +15,4 @@
# ============================================================================ # ============================================================================
# create dataset for train # create dataset for train
python create_dataset.py --input_file=output/simple.mindrecord0 python create_dataset.py --input_file=output/simple.mindrecord

View File

@ -16,29 +16,29 @@
rm -f output/simple.mindrecord* rm -f output/simple.mindrecord*
if [ ! -d "../../../third_party/to_mindrecord/zhwiki" ]; then if [ ! -d "../../../../third_party/to_mindrecord/zhwiki" ]; then
echo "The patch base dir ../../../third_party/to_mindrecord/zhwiki is not exist." echo "The patch base dir ../../../../third_party/to_mindrecord/zhwiki is not exist."
exit 1 exit 1
fi fi
if [ ! -f "../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch" ]; then if [ ! -f "../../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch" ]; then
echo "The patch file ../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch is not exist." echo "The patch file ../../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch is not exist."
exit 1 exit 1
fi fi
# patch for create_pretraining_data.py # patch for create_pretraining_data.py
patch -p0 -d ../../../third_party/to_mindrecord/zhwiki/ -o create_pretraining_data_patched.py < ../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch patch -p0 -d ../../../../third_party/to_mindrecord/zhwiki/ -o create_pretraining_data_patched.py < ../../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch
if [ $? -ne 0 ]; then if [ $? -ne 0 ]; then
echo "Patch ../../../third_party/to_mindrecord/zhwiki/create_pretraining_data.py failed" echo "Patch ../../../../third_party/to_mindrecord/zhwiki/create_pretraining_data.py failed"
exit 1 exit 1
fi fi
# using patched script to generate mindrecord # using patched script to generate mindrecord
python ../../../third_party/to_mindrecord/zhwiki/create_pretraining_data_patched.py \ python ../../../../third_party/to_mindrecord/zhwiki/create_pretraining_data_patched.py \
--input_file=../../../third_party/to_mindrecord/zhwiki/sample_text.txt \ --input_file=../../../../third_party/to_mindrecord/zhwiki/sample_text.txt \
--output_file=output/simple.mindrecord \ --output_file=output/simple.mindrecord \
--partition_number=4 \ --partition_number=1 \
--vocab_file=../../../third_party/to_mindrecord/zhwiki/vocab.txt \ --vocab_file=../../../../third_party/to_mindrecord/zhwiki/vocab.txt \
--do_lower_case=True \ --do_lower_case=True \
--max_seq_length=128 \ --max_seq_length=128 \
--max_predictions_per_seq=20 \ --max_predictions_per_seq=20 \