upload PaddleOCR code

2020-05-10 16:26:57 +08:00 · 2020-05-10 16:26:57 +08:00 · 338ba3ee4a
parent bc93c549fe
commit 338ba3ee4a
84 changed files with 15543 additions and 0 deletions
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@ -0,0 +1,35 @@
+-   repo: https://github.com/PaddlePaddle/mirrors-yapf.git
+    sha: 0d79c0c469bab64f7229c9aca2b1186ef47f0e37
+    hooks:
+    -   id: yapf
+        files: \.py$
+-   repo: https://github.com/pre-commit/pre-commit-hooks
+    sha: a11d9314b22d8f8c7556443875b731ef05965464
+    hooks:
+    -   id: check-merge-conflict
+    -   id: check-symlinks
+    -   id: detect-private-key
+        files: (?!.*paddle)^.*$
+    -   id: end-of-file-fixer
+        files: \.md$
+    -   id: trailing-whitespace
+        files: \.md$
+-   repo: https://github.com/Lucas-C/pre-commit-hooks
+    sha: v1.0.1
+    hooks:
+    -   id: forbid-crlf
+        files: \.md$
+    -   id: remove-crlf
+        files: \.md$
+    -   id: forbid-tabs
+        files: \.md$
+    -   id: remove-tabs
+        files: \.md$
+-   repo: local
+    hooks:
+    -   id: clang-format
+        name: clang-format
+        description: Format files with ClangFormat
+        entry: bash .clang_format.hook -i
+        language: system
+        files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|cuh|proto)$
--- a/.style.yapf
+++ b/.style.yapf
@ -0,0 +1,3 @@
+[style]
+based_on_style = pep8
+column_limit = 80
--- a/configs/det/det_db_icdar15_reader.yml
+++ b/configs/det/det_db_icdar15_reader.yml
@ -0,0 +1,22 @@
+TrainReader:
+  reader_function: ppocr.data.det.dataset_traversal,TrainReader
+  process_function: ppocr.data.det.db_process,DBProcessTrain
+  num_workers: 8
+  img_set_dir: ./train_data/icdar2015/text_localization/
+  label_file_path: ./train_data/icdar2015/text_localization/train_icdar2015_label.txt
+
+EvalReader:
+  reader_function: ppocr.data.det.dataset_traversal,EvalTestReader
+  process_function: ppocr.data.det.db_process,DBProcessTest
+  img_set_dir: ./train_data/icdar2015/text_localization/
+  label_file_path: ./train_data/icdar2015/text_localization/test_icdar2015_label.txt
+  test_image_shape: [736, 1280]
+  
+TestReader:
+  reader_function: ppocr.data.det.dataset_traversal,EvalTestReader
+  process_function: ppocr.data.det.db_process,DBProcessTest
+  single_img_path: 
+  img_set_dir: ./train_data/icdar2015/text_localization/
+  label_file_path: ./train_data/icdar2015/text_localization/test_icdar2015_label.txt
+  test_image_shape: [736, 1280]
+  do_eval: True
--- a/configs/det/det_db_mv3.yml
+++ b/configs/det/det_db_mv3.yml
@ -0,0 +1,51 @@
+Global:
+  algorithm: DB
+  use_gpu: true
+  epoch_num: 1200
+  log_smooth_window: 20
+  print_batch_step: 2
+  save_model_dir: output
+  save_epoch_step: 200
+  eval_batch_step: 5000
+  train_batch_size_per_card: 16
+  test_batch_size_per_card: 16
+  image_shape: [3, 640, 640]
+  reader_yml: ./configs/det/det_db_icdar15_reader.yml
+  pretrain_weights: ./pretrain_models/MobileNetV3_pretrained/MobileNetV3_large_x0_5_pretrained/
+  save_res_path: ./output/predicts_db.txt
+  
+Architecture:
+  function: ppocr.modeling.architectures.det_model,DetModel
+
+Backbone:
+  function: ppocr.modeling.backbones.det_mobilenet_v3,MobileNetV3
+  scale: 0.5
+  model_name: large
+
+Head:
+  function: ppocr.modeling.heads.det_db_head,DBHead
+  model_name: large
+  k: 50
+  inner_channels: 96
+  out_channels: 2
+
+Loss:
+  function: ppocr.modeling.losses.det_db_loss,DBLoss
+  balance_loss: true
+  main_loss_type: DiceLoss
+  alpha: 5
+  beta: 10
+  ohem_ratio: 3
+
+Optimizer:
+  function: ppocr.optimizer,AdamDecay
+  base_lr: 0.001
+  beta1: 0.9
+  beta2: 0.999
+
+PostProcess:
+  function: ppocr.postprocess.db_postprocess,DBPostProcess
+  thresh: 0.3
+  box_thresh: 0.7
+  max_candidates: 1000
+  unclip_ratio: 1.5
--- a/configs/det/det_db_r50_vd.yml
+++ b/configs/det/det_db_r50_vd.yml
@ -0,0 +1,51 @@
+Global:
+  algorithm: DB
+  use_gpu: true
+  epoch_num: 1200
+  log_smooth_window: 20
+  print_batch_step: 2
+  save_model_dir: output
+  save_epoch_step: 200
+  eval_batch_step: 5000
+  train_batch_size_per_card: 8
+  test_batch_size_per_card: 16
+  image_shape: [3, 640, 640]
+  reader_yml: ./configs/det/det_db_icdar15_reader.yml
+  pretrain_weights: ./pretrain_models/ResNet50_vd_pretrained/
+  save_res_path: ./output/predicts_db.txt
+  
+Architecture:
+  function: ppocr.modeling.architectures.det_model,DetModel
+
+Backbone:
+  function: ppocr.modeling.backbones.det_resnet_vd,ResNet
+  layers: 50
+
+Head:
+  function: ppocr.modeling.heads.det_db_head,DBHead
+  model_name: large
+  k: 50
+  inner_channels: 256
+  out_channels: 2
+
+Loss:
+  function: ppocr.modeling.losses.det_db_loss,DBLoss
+  balance_loss: true
+  main_loss_type: DiceLoss
+  alpha: 5
+  beta: 10
+  ohem_ratio: 3
+
+Optimizer:
+  function: ppocr.optimizer,AdamDecay
+  base_lr: 0.001
+  beta1: 0.9
+  beta2: 0.999
+
+PostProcess:
+  function: ppocr.postprocess.db_postprocess,DBPostProcess
+  thresh: 0.3
+  box_thresh: 0.7
+  max_candidates: 1000
+  unclip_ratio: 1.5
+  
--- a/configs/det/det_east_icdar15_reader.yml
+++ b/configs/det/det_east_icdar15_reader.yml
@ -0,0 +1,23 @@
+TrainReader:
+  reader_function: ppocr.data.det.dataset_traversal,TrainReader
+  process_function: ppocr.data.det.east_process,EASTProcessTrain
+  num_workers: 8
+  img_set_dir: ./train_data/icdar2015/text_localization/
+  label_file_path: ./train_data/icdar2015/text_localization/train_icdar2015_label.txt
+  background_ratio: 0.125
+  min_crop_side_ratio: 0.1
+  min_text_size: 10
+
+EvalReader:
+  reader_function: ppocr.data.det.dataset_traversal,EvalTestReader
+  process_function: ppocr.data.det.east_process,EASTProcessTest
+  img_set_dir: ./train_data/icdar2015/text_localization/
+  label_file_path: ./train_data/icdar2015/text_localization/test_icdar2015_label.txt
+  
+TestReader:
+  reader_function: ppocr.data.det.dataset_traversal,EvalTestReader
+  process_function: ppocr.data.det.east_process,EASTProcessTest
+  single_img_path: 
+  img_set_dir: ./train_data/icdar2015/text_localization/
+  label_file_path: ./train_data/icdar2015/text_localization/test_icdar2015_label.txt
+  do_eval: True
--- a/configs/det/det_east_mv3.yml
+++ b/configs/det/det_east_mv3.yml
@ -0,0 +1,43 @@
+Global:
+  algorithm: EAST
+  use_gpu: true
+  epoch_num: 100000
+  log_smooth_window: 20
+  print_batch_step: 5
+  save_model_dir: output
+  save_epoch_step: 200
+  eval_batch_step: 5000
+  train_batch_size_per_card: 16
+  test_batch_size_per_card: 16
+  image_shape: [3, 512, 512]
+  reader_yml: ./configs/det/det_east_icdar15_reader.yml
+  pretrain_weights: ./pretrain_models/MobileNetV3_pretrained/MobileNetV3_large_x0_5_pretrained/
+  save_res_path: ./output/predicts_east.txt
+  
+Architecture:
+  function: ppocr.modeling.architectures.det_model,DetModel
+
+Backbone:
+  function: ppocr.modeling.backbones.det_mobilenet_v3,MobileNetV3
+  scale: 0.5
+  model_name: large
+
+Head:
+  function: ppocr.modeling.heads.det_east_head,EASTHead
+  model_name: small
+  
+Loss:
+  function: ppocr.modeling.losses.det_east_loss,EASTLoss
+
+Optimizer:
+  function: ppocr.optimizer,AdamDecay
+  base_lr: 0.001
+  beta1: 0.9
+  beta2: 0.999
+
+PostProcess:
+  function: ppocr.postprocess.east_postprocess,EASTPostPocess
+  score_thresh: 0.8
+  cover_thresh: 0.1
+  nms_thresh: 0.2
+  
--- a/configs/det/det_east_r50_vd.yml
+++ b/configs/det/det_east_r50_vd.yml
@ -0,0 +1,42 @@
+Global:
+  algorithm: EAST
+  use_gpu: true
+  epoch_num: 100000
+  log_smooth_window: 20
+  print_batch_step: 5
+  save_model_dir: output
+  save_epoch_step: 200
+  eval_batch_step: 5000
+  train_batch_size_per_card: 8
+  test_batch_size_per_card: 16
+  image_shape: [3, 512, 512]
+  reader_yml: ./configs/det/det_east_icdar15_reader.yml
+  pretrain_weights: ./pretrain_models/ResNet50_vd_pretrained/
+  save_res_path: ./output/predicts_east.txt
+  
+Architecture:
+  function: ppocr.modeling.architectures.det_model,DetModel
+
+Backbone:
+  function: ppocr.modeling.backbones.det_resnet_vd,ResNet
+  layers: 50
+
+Head:
+  function: ppocr.modeling.heads.det_east_head,EASTHead
+  model_name: large
+  
+Loss:
+  function: ppocr.modeling.losses.det_east_loss,EASTLoss
+
+Optimizer:
+  function: ppocr.optimizer,AdamDecay
+  base_lr: 0.001
+  beta1: 0.9
+  beta2: 0.999
+
+PostProcess:
+  function: ppocr.postprocess.east_postprocess,EASTPostPocess
+  score_thresh: 0.8
+  cover_thresh: 0.1
+  nms_thresh: 0.2
+  
--- a/configs/rec/rec_benchmark_reader.yml
+++ b/configs/rec/rec_benchmark_reader.yml
@ -0,0 +1,12 @@
+TrainReader:
+  reader_function: ppocr.data.rec.dataset_traversal,LMDBReader
+  num_workers: 8
+  lmdb_sets_dir: ./train_data/data_lmdb_release/training/
+  
+EvalReader:
+  reader_function: ppocr.data.rec.dataset_traversal,LMDBReader
+  lmdb_sets_dir: ./train_data/data_lmdb_release/validation/
+
+TestReader:
+  reader_function: ppocr.data.rec.dataset_traversal,LMDBReader
+  lmdb_sets_dir: ./train_data/data_lmdb_release/evaluation/
--- a/configs/rec/rec_chinese_lite_train.yml
+++ b/configs/rec/rec_chinese_lite_train.yml
@ -0,0 +1,42 @@
+Global:
+  algorithm: CRNN
+  dataset: common
+  use_gpu: true
+  epoch_num: 300
+  log_smooth_window: 20
+  print_batch_step: 10
+  save_model_dir: output
+  save_epoch_step: 3
+  eval_batch_step: 2000
+  train_batch_size_per_card: 256
+  test_batch_size_per_card: 256
+  image_shape: [3, 32, 100]
+  max_text_length: 25
+  character_type: ch
+  character_dict_path: ./ppocr/utils/ppocr_keys_v1.txt
+  loss_type: ctc
+  reader_yml: ./configs/rec/rec_chinese_reader.yml
+  pretrain_weights:
+  
+Architecture:
+  function: ppocr.modeling.architectures.rec_model,RecModel
+
+Backbone:
+  function: ppocr.modeling.backbones.rec_mobilenet_v3,MobileNetV3
+  scale: 0.5
+  model_name: small
+
+Head:
+  function: ppocr.modeling.heads.rec_ctc_head,CTCPredict
+  encoder_type: rnn
+  SeqRNN:
+    hidden_size: 48
+    
+Loss:
+  function: ppocr.modeling.losses.rec_ctc_loss,CTCLoss
+
+Optimizer:
+  function: ppocr.optimizer,AdamDecay
+  base_lr: 0.001
+  beta1: 0.9
+  beta2: 0.999
--- a/configs/rec/rec_chinese_reader.yml
+++ b/configs/rec/rec_chinese_reader.yml
@ -0,0 +1,14 @@
+TrainReader:
+  reader_function: ppocr.data.rec.dataset_traversal,SimpleReader
+  num_workers: 8
+  img_set_dir: .
+  label_file_path: ./train_data/hard_label.txt
+  
+EvalReader:
+  reader_function: ppocr.data.rec.dataset_traversal,SimpleReader
+  img_set_dir: .
+  label_file_path: ./train_data/label_val_all.txt
+
+TestReader:
+  reader_function: ppocr.data.rec.dataset_traversal,SimpleReader
+  infer_img: ./infer_img
--- a/configs/rec/rec_mv3_none_bilstm_ctc.yml
+++ b/configs/rec/rec_mv3_none_bilstm_ctc.yml
@ -0,0 +1,40 @@
+Global:
+  algorithm: CRNN
+  use_gpu: true
+  epoch_num: 72
+  log_smooth_window: 20
+  print_batch_step: 10
+  save_model_dir: output
+  save_epoch_step: 3
+  eval_batch_step: 2000
+  train_batch_size_per_card: 256
+  test_batch_size_per_card: 256
+  image_shape: [3, 32, 100]
+  max_text_length: 25
+  character_type: en
+  loss_type: ctc
+  reader_yml: ./configs/rec/rec_benchmark_reader.yml
+  pretrain_weights: 
+  
+Architecture:
+  function: ppocr.modeling.architectures.rec_model,RecModel
+
+Backbone:
+  function: ppocr.modeling.backbones.rec_mobilenet_v3,MobileNetV3
+  scale: 0.5
+  model_name: large
+ 
+Head:
+  function: ppocr.modeling.heads.rec_ctc_head,CTCPredict
+  encoder_type: rnn
+  SeqRNN:
+    hidden_size: 96
+    
+Loss:
+  function: ppocr.modeling.losses.rec_ctc_loss,CTCLoss
+
+Optimizer:
+  function: ppocr.optimizer,AdamDecay
+  base_lr: 0.001
+  beta1: 0.9
+  beta2: 0.999
--- a/configs/rec/rec_mv3_none_none_ctc.yml
+++ b/configs/rec/rec_mv3_none_none_ctc.yml
@ -0,0 +1,38 @@
+Global:
+  algorithm: Rosetta
+  use_gpu: true
+  epoch_num: 72
+  log_smooth_window: 20
+  print_batch_step: 10
+  save_model_dir: output
+  save_epoch_step: 3
+  eval_batch_step: 2000
+  train_batch_size_per_card: 256
+  test_batch_size_per_card: 256
+  image_shape: [3, 32, 100]
+  max_text_length: 25
+  character_type: en
+  loss_type: ctc
+  reader_yml: ./configs/rec/rec_benchmark_reader.yml
+  pretrain_weights: 
+  
+Architecture:
+  function: ppocr.modeling.architectures.rec_model,RecModel
+
+Backbone:
+  function: ppocr.modeling.backbones.rec_mobilenet_v3,MobileNetV3
+  scale: 0.5
+  model_name: large
+
+Head:
+  function: ppocr.modeling.heads.rec_ctc_head,CTCPredict
+  encoder_type: reshape
+  
+Loss:
+  function: ppocr.modeling.losses.rec_ctc_loss,CTCLoss
+
+Optimizer:
+  function: ppocr.optimizer,AdamDecay
+  base_lr: 0.001
+  beta1: 0.9
+  beta2: 0.999
--- a/configs/rec/rec_mv3_tps_bilstm_attn.yml
+++ b/configs/rec/rec_mv3_tps_bilstm_attn.yml
@ -0,0 +1,49 @@
+Global:
+  algorithm: RARE
+  use_gpu: true
+  epoch_num: 72
+  log_smooth_window: 20
+  print_batch_step: 10
+  save_model_dir: output
+  save_epoch_step: 3
+  eval_batch_step: 2000
+  train_batch_size_per_card: 256
+  test_batch_size_per_card: 256
+  image_shape: [3, 32, 100]
+  max_text_length: 25
+  character_type: en
+  loss_type: attention
+  reader_yml: ./configs/rec/rec_benchmark_reader.yml
+  pretrain_weights: 
+  
+Architecture:
+  function: ppocr.modeling.architectures.rec_model,RecModel
+
+TPS:
+  function: ppocr.modeling.stns.tps,TPS
+  num_fiducial: 20
+  loc_lr: 0.1
+  model_name: small
+  
+Backbone:
+  function: ppocr.modeling.backbones.rec_mobilenet_v3,MobileNetV3
+  scale: 0.5
+  model_name: large
+ 
+Head:
+  function: ppocr.modeling.heads.rec_attention_head,AttentionPredict
+  encoder_type: rnn
+  SeqRNN:
+    hidden_size: 96
+  Attention:
+    decoder_size: 96
+    word_vector_dim: 96
+  
+Loss:
+  function: ppocr.modeling.losses.rec_attention_loss,AttentionLoss
+  
+Optimizer:
+  function: ppocr.optimizer,AdamDecay
+  base_lr: 0.001
+  beta1: 0.9
+  beta2: 0.999
--- a/configs/rec/rec_mv3_tps_bilstm_ctc.yml
+++ b/configs/rec/rec_mv3_tps_bilstm_ctc.yml
@ -0,0 +1,46 @@
+Global:
+  algorithm: STARNet
+  use_gpu: true
+  epoch_num: 72
+  log_smooth_window: 20
+  print_batch_step: 10
+  save_model_dir: output
+  save_epoch_step: 3
+  eval_batch_step: 2000
+  train_batch_size_per_card: 256
+  test_batch_size_per_card: 256
+  image_shape: [3, 32, 100]
+  max_text_length: 25
+  character_type: en
+  loss_type: ctc
+  reader_yml: ./configs/rec/rec_benchmark_reader.yml
+  pretrain_weights: 
+  
+Architecture:
+  function: ppocr.modeling.architectures.rec_model,RecModel
+
+TPS:
+  function: ppocr.modeling.stns.tps,TPS
+  num_fiducial: 20
+  loc_lr: 0.1
+  model_name: small
+  
+Backbone:
+  function: ppocr.modeling.backbones.rec_mobilenet_v3,MobileNetV3
+  scale: 0.5
+  model_name: large
+ 
+Head:
+  function: ppocr.modeling.heads.rec_ctc_head,CTCPredict
+  encoder_type: rnn
+  SeqRNN:
+    hidden_size: 96
+    
+Loss:
+  function: ppocr.modeling.losses.rec_ctc_loss,CTCLoss
+
+Optimizer:
+  function: ppocr.optimizer,AdamDecay
+  base_lr: 0.001
+  beta1: 0.9
+  beta2: 0.999
--- a/configs/rec/rec_r34_vd_none_bilstm_ctc.yml
+++ b/configs/rec/rec_r34_vd_none_bilstm_ctc.yml
@ -0,0 +1,39 @@
+Global:
+  algorithm: CRNN
+  use_gpu: true
+  epoch_num: 72
+  log_smooth_window: 20
+  print_batch_step: 10
+  save_model_dir: output
+  save_epoch_step: 3
+  eval_batch_step: 2000
+  train_batch_size_per_card: 256
+  test_batch_size_per_card: 256
+  image_shape: [3, 32, 100]
+  max_text_length: 25
+  character_type: en
+  loss_type: ctc
+  reader_yml: ./configs/rec/rec_benchmark_reader.yml
+  pretrain_weights: 
+  
+Architecture:
+  function: ppocr.modeling.architectures.rec_model,RecModel
+
+Backbone:
+  function: ppocr.modeling.backbones.rec_resnet_vd,ResNet
+  layers: 34
+ 
+Head:
+  function: ppocr.modeling.heads.rec_ctc_head,CTCPredict
+  encoder_type: rnn
+  SeqRNN:
+    hidden_size: 256
+    
+Loss:
+  function: ppocr.modeling.losses.rec_ctc_loss,CTCLoss
+
+Optimizer:
+  function: ppocr.optimizer,AdamDecay
+  base_lr: 0.001
+  beta1: 0.9
+  beta2: 0.999
--- a/configs/rec/rec_r34_vd_none_none_ctc.yml
+++ b/configs/rec/rec_r34_vd_none_none_ctc.yml
@ -0,0 +1,37 @@
+Global:
+  algorithm: Rosetta
+  use_gpu: true
+  epoch_num: 72
+  log_smooth_window: 20
+  print_batch_step: 10
+  save_model_dir: output
+  save_epoch_step: 3
+  eval_batch_step: 2000
+  train_batch_size_per_card: 256
+  test_batch_size_per_card: 256
+  image_shape: [3, 32, 100]
+  max_text_length: 25
+  character_type: en
+  loss_type: ctc
+  reader_yml: ./configs/rec/rec_benchmark_reader.yml
+  pretrain_weights: 
+  
+Architecture:
+  function: ppocr.modeling.architectures.rec_model,RecModel
+
+Backbone:
+  function: ppocr.modeling.backbones.rec_resnet_vd,ResNet
+  layers: 34
+
+Head:
+  function: ppocr.modeling.heads.rec_ctc_head,CTCPredict
+  encoder_type: reshape
+  
+Loss:
+  function: ppocr.modeling.losses.rec_ctc_loss,CTCLoss
+
+Optimizer:
+  function: ppocr.optimizer,AdamDecay
+  base_lr: 0.001
+  beta1: 0.9
+  beta2: 0.999
--- a/configs/rec/rec_r34_vd_tps_bilstm_attn.yml
+++ b/configs/rec/rec_r34_vd_tps_bilstm_attn.yml
@ -0,0 +1,48 @@
+Global:
+  algorithm: RARE
+  use_gpu: true
+  epoch_num: 72
+  log_smooth_window: 20
+  print_batch_step: 10
+  save_model_dir: output
+  save_epoch_step: 3
+  eval_batch_step: 2000
+  train_batch_size_per_card: 256
+  test_batch_size_per_card: 256
+  image_shape: [3, 32, 100]
+  max_text_length: 25
+  character_type: en
+  loss_type: attention
+  reader_yml: ./configs/rec/rec_benchmark_reader.yml
+  pretrain_weights: 
+
+Architecture:
+  function: ppocr.modeling.architectures.rec_model,RecModel
+
+TPS:
+  function: ppocr.modeling.stns.tps,TPS
+  num_fiducial: 20
+  loc_lr: 0.1
+  model_name: large
+
+Backbone:
+  function: ppocr.modeling.backbones.rec_resnet_vd,ResNet
+  layers: 34
+ 
+Head:
+  function: ppocr.modeling.heads.rec_attention_head,AttentionPredict
+  encoder_type: rnn
+  SeqRNN:
+    hidden_size: 256
+  Attention:
+    decoder_size: 128
+    word_vector_dim: 128
+  
+Loss:
+  function: ppocr.modeling.losses.rec_attention_loss,AttentionLoss
+  
+Optimizer:
+  function: ppocr.optimizer,AdamDecay
+  base_lr: 0.001
+  beta1: 0.9
+  beta2: 0.999
--- a/configs/rec/rec_r34_vd_tps_bilstm_ctc.yml
+++ b/configs/rec/rec_r34_vd_tps_bilstm_ctc.yml
@ -0,0 +1,45 @@
+Global:
+  algorithm: STARNet
+  use_gpu: true
+  epoch_num: 72
+  log_smooth_window: 20
+  print_batch_step: 10
+  save_model_dir: output
+  save_epoch_step: 3
+  eval_batch_step: 2000
+  train_batch_size_per_card: 256
+  test_batch_size_per_card: 256
+  image_shape: [3, 32, 100]
+  max_text_length: 25
+  character_type: en
+  loss_type: ctc
+  reader_yml: ./configs/rec/rec_benchmark_reader.yml
+  pretrain_weights:
+
+Architecture:
+  function: ppocr.modeling.architectures.rec_model,RecModel
+
+TPS:
+  function: ppocr.modeling.stns.tps,TPS
+  num_fiducial: 20
+  loc_lr: 0.1
+  model_name: large
+
+Backbone:
+  function: ppocr.modeling.backbones.rec_resnet_vd,ResNet
+  layers: 34
+ 
+Head:
+  function: ppocr.modeling.heads.rec_ctc_head,CTCPredict
+  encoder_type: rnn
+  SeqRNN:
+    hidden_size: 256
+    
+Loss:
+  function: ppocr.modeling.losses.rec_ctc_loss,CTCLoss
+
+Optimizer:
+  function: ppocr.optimizer,AdamDecay
+  base_lr: 0.001
+  beta1: 0.9
+  beta2: 0.999
--- a/ppocr/init.py
+++ b/ppocr/init.py
@ -0,0 +1,13 @@
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
--- a/ppocr/data/init.py
+++ b/ppocr/data/init.py
@ -0,0 +1,13 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
--- a/ppocr/data/det/data_augment.py
+++ b/ppocr/data/det/data_augment.py
@ -0,0 +1,47 @@
+# -*- coding:utf-8 -*- 
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import numpy as np
+import random
+import cv2
+import math
+
+import imgaug
+import imgaug.augmenters as iaa
+
+
+def AugmentData(data):
+    img = data['image']
+    shape = img.shape
+
+    aug = iaa.Sequential(
+        [iaa.Fliplr(0.5), iaa.Affine(rotate=(-10, 10)), iaa.Resize(
+            (0.5, 3))]).to_deterministic()
+
+    def may_augment_annotation(aug, data, shape):
+        if aug is None:
+            return data
+
+        line_polys = []
+        for poly in data['polys']:
+            new_poly = may_augment_poly(aug, shape, poly)
+            line_polys.append(new_poly)
+        data['polys'] = np.array(line_polys)
+        return data
+
+    def may_augment_poly(aug, img_shape, poly):
+        keypoints = [imgaug.Keypoint(p[0], p[1]) for p in poly]
+        keypoints = aug.augment_keypoints(
+            [imgaug.KeypointsOnImage(
+                keypoints, shape=img_shape)])[0].keypoints
+        poly = [(p.x, p.y) for p in keypoints]
+        return poly
+
+    img_aug = aug.augment_image(img)
+    data['image'] = img_aug
+    data = may_augment_annotation(aug, data, shape)
+    return data
--- a/ppocr/data/det/dataset_traversal.py
+++ b/ppocr/data/det/dataset_traversal.py
@ -0,0 +1,110 @@
+#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
+
+import os
+import math
+import random
+import functools
+import numpy as np
+import cv2
+import string
+from ppocr.utils.utility import initial_logger
+logger = initial_logger()
+from ppocr.utils.utility import create_module
+import time
+
+
+class TrainReader(object):
+    def __init__(self, params):
+        self.num_workers = params['num_workers']
+        self.label_file_path = params['label_file_path']
+        self.batch_size = params['train_batch_size_per_card']
+        assert 'process_function' in params,\
+            "absence process_function in Reader"
+        self.process = create_module(params['process_function'])(params)
+
+    def __call__(self, process_id):
+        def sample_iter_reader():
+            with open(self.label_file_path, "rb") as fin:
+                label_infor_list = fin.readlines()
+            img_num = len(label_infor_list)
+            img_id_list = list(range(img_num))
+            random.shuffle(img_id_list)
+            for img_id in range(process_id, img_num, self.num_workers):
+                label_infor = label_infor_list[img_id_list[img_id]]
+                outs = self.process(label_infor)
+                if outs is None:
+                    continue
+                yield outs
+
+        def batch_iter_reader():
+            batch_outs = []
+            for outs in sample_iter_reader():
+                batch_outs.append(outs)
+                if len(batch_outs) == self.batch_size:
+                    yield batch_outs
+                    batch_outs = []
+            if len(batch_outs) != 0:
+                yield batch_outs
+
+        return batch_iter_reader
+
+
+class EvalTestReader(object):
+    def __init__(self, params):
+        self.params = params
+        assert 'process_function' in params,\
+            "absence process_function in EvalTestReader"
+
+    def __call__(self, mode):
+        process_function = create_module(self.params['process_function'])(
+            self.params)
+        batch_size = self.params['test_batch_size_per_card']
+
+        flag_test_single_img = False
+        if mode == "test":
+            single_img_path = self.params['single_img_path']
+            if single_img_path is not None:
+                flag_test_single_img = True
+
+        img_list = []
+        if flag_test_single_img:
+            img_list.append([single_img_path, single_img_path])
+        else:
+            img_set_dir = self.params['img_set_dir']
+            img_name_list_path = self.params['label_file_path']
+            with open(img_name_list_path, "rb") as fin:
+                lines = fin.readlines()
+                for line in lines:
+                    img_name = line.decode().strip("\n").split("\t")[0]
+                    img_path = img_set_dir + "/" + img_name
+                    img_list.append([img_path, img_name])
+
+        def batch_iter_reader():
+            batch_outs = []
+            for img_path, img_name in img_list:
+                img = cv2.imread(img_path)
+                if img is None:
+                    logger.info("load image error:" + img_path)
+                    continue
+                outs = process_function(img)
+                outs.append(img_name)
+                batch_outs.append(outs)
+                if len(batch_outs) == batch_size:
+                    yield batch_outs
+                    batch_outs = []
+            if len(batch_outs) != 0:
+                yield batch_outs
+
+        return batch_iter_reader
--- a/ppocr/data/det/db_process.py
+++ b/ppocr/data/det/db_process.py
@ -0,0 +1,192 @@
+#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
+
+import math
+import cv2
+import numpy as np
+import json
+import sys
+
+from .data_augment import AugmentData
+from .random_crop_data import RandomCropData
+from .make_shrink_map import MakeShrinkMap
+from .make_border_map import MakeBorderMap
+
+
+class DBProcessTrain(object):
+    def __init__(self, params):
+        self.img_set_dir = params['img_set_dir']
+        self.image_shape = params['image_shape']
+
+    def order_points_clockwise(self, pts):
+        rect = np.zeros((4, 2), dtype="float32")
+        s = pts.sum(axis=1)
+        rect[0] = pts[np.argmin(s)]
+        rect[2] = pts[np.argmax(s)]
+        diff = np.diff(pts, axis=1)
+        rect[1] = pts[np.argmin(diff)]
+        rect[3] = pts[np.argmax(diff)]
+        return rect
+
+    def make_data_dict(self, imgvalue, entry):
+        boxes = []
+        texts = []
+        ignores = []
+        for rect in entry:
+            points = rect['points']
+            transcription = rect['transcription']
+            try:
+                box = self.order_points_clockwise(
+                    np.array(points).reshape(-1, 2))
+                if cv2.contourArea(box) > 0:
+                    boxes.append(box)
+                    texts.append(transcription)
+                    ignores.append(transcription in ['*', '###'])
+            except:
+                print('load label failed!')
+        data = {
+            'image': imgvalue,
+            'shape': [imgvalue.shape[0], imgvalue.shape[1]],
+            'polys': np.array(boxes),
+            'texts': texts,
+            'ignore_tags': ignores,
+        }
+        return data
+
+    def NormalizeImage(self, data):
+        im = data['image']
+        img_mean = [0.485, 0.456, 0.406]
+        img_std = [0.229, 0.224, 0.225]
+        im = im.astype(np.float32, copy=False)
+        im = im / 255
+        im -= img_mean
+        im /= img_std
+        channel_swap = (2, 0, 1)
+        im = im.transpose(channel_swap)
+        data['image'] = im
+        return data
+
+    def FilterKeys(self, data):
+        filter_keys = ['polys', 'texts', 'ignore_tags', 'shape']
+        for key in filter_keys:
+            if key in data:
+                del data[key]
+        return data
+
+    def convert_label_infor(self, label_infor):
+        label_infor = label_infor.decode()
+        label_infor = label_infor.encode('utf-8').decode('utf-8-sig')
+        substr = label_infor.strip("\n").split("\t")
+        img_path = self.img_set_dir + substr[0]
+        label = json.loads(substr[1])
+        return img_path, label
+
+    def __call__(self, label_infor):
+        img_path, gt_label = self.convert_label_infor(label_infor)
+        imgvalue = cv2.imread(img_path)
+        if imgvalue is None:
+            return None
+        data = self.make_data_dict(imgvalue, gt_label)
+        data = AugmentData(data)
+        data = RandomCropData(data, self.image_shape[1:])
+        data = MakeShrinkMap(data)
+        data = MakeBorderMap(data)
+        data = self.NormalizeImage(data)
+        data = self.FilterKeys(data)
+        return data['image'], data['shrink_map'], data['shrink_mask'], data[
+            'threshold_map'], data['threshold_mask']
+
+
+class DBProcessTest(object):
+    def __init__(self, params):
+        super(DBProcessTest, self).__init__()
+        self.resize_type = 0
+        if 'det_image_shape' in params:
+            self.image_shape = params['det_image_shape']
+            # print(self.image_shape)
+            self.resize_type = 1
+        if 'max_side_len' in params:
+            self.max_side_len = params['max_side_len']
+        else:
+            self.max_side_len = 2400
+
+    def resize_image_type0(self, im):
+        """
+        resize image to a size multiple of 32 which is required by the network
+        :param im: the resized image
+        :param max_side_len: limit of max image size to avoid out of memory in gpu
+        :return: the resized image and the resize ratio
+        """
+        max_side_len = self.max_side_len
+        h, w, _ = im.shape
+
+        resize_w = w
+        resize_h = h
+
+        # limit the max side
+        if max(resize_h, resize_w) > max_side_len:
+            if resize_h > resize_w:
+                ratio = float(max_side_len) / resize_h
+            else:
+                ratio = float(max_side_len) / resize_w
+        else:
+            ratio = 1.
+        resize_h = int(resize_h * ratio)
+        resize_w = int(resize_w * ratio)
+        if resize_h % 32 == 0:
+            resize_h = resize_h
+        else:
+            resize_h = (resize_h // 32 + 1) * 32
+        if resize_w % 32 == 0:
+            resize_w = resize_w
+        else:
+            resize_w = (resize_w // 32 + 1) * 32
+        try:
+            if int(resize_w) <= 0 or int(resize_h) <= 0:
+                return None, (None, None)
+            im = cv2.resize(im, (int(resize_w), int(resize_h)))
+        except:
+            print(im.shape, resize_w, resize_h)
+            sys.exit(0)
+        ratio_h = resize_h / float(h)
+        ratio_w = resize_w / float(w)
+        return im, (ratio_h, ratio_w)
+
+    def resize_image_type1(self, im):
+        resize_h, resize_w = self.image_shape
+        ori_h, ori_w = im.shape[:2]  # (h, w, c)
+        im = cv2.resize(im, (int(resize_w), int(resize_h)))
+        ratio_h = float(resize_h) / ori_h
+        ratio_w = float(resize_w) / ori_w
+        return im, (ratio_h, ratio_w)
+
+    def normalize(self, im):
+        img_mean = [0.485, 0.456, 0.406]
+        img_std = [0.229, 0.224, 0.225]
+        im = im.astype(np.float32, copy=False)
+        im = im / 255
+        im -= img_mean
+        im /= img_std
+        channel_swap = (2, 0, 1)
+        im = im.transpose(channel_swap)
+        return im
+
+    def __call__(self, im):
+        if self.resize_type == 0:
+            im, (ratio_h, ratio_w) = self.resize_image_type0(im)
+        else:
+            im, (ratio_h, ratio_w) = self.resize_image_type1(im)
+        im = self.normalize(im)
+        im = im[np.newaxis, :]
+        return [im, (ratio_h, ratio_w)]
--- a/ppocr/data/det/east_process.py
+++ b/ppocr/data/det/east_process.py
@ -0,0 +1,509 @@
+#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
+
+import math
+import cv2
+import numpy as np
+import json
+
+
+class EASTProcessTrain(object):
+    def __init__(self, params):
+        self.img_set_dir = params['img_set_dir']
+        self.random_scale = np.array([0.5, 1, 2.0, 3.0])
+        self.background_ratio = params['background_ratio']
+        self.min_crop_side_ratio = params['min_crop_side_ratio']
+        image_shape = params['image_shape']
+        self.input_size = image_shape[1]
+        self.min_text_size = params['min_text_size']
+
+    def preprocess(self, im):
+        input_size = self.input_size
+        im_shape = im.shape
+        im_size_min = np.min(im_shape[0:2])
+        im_size_max = np.max(im_shape[0:2])
+        im_scale = float(input_size) / float(im_size_max)
+        im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale)
+        img_mean = [0.485, 0.456, 0.406]
+        img_std = [0.229, 0.224, 0.225]
+        im = im[:, :, ::-1].astype(np.float32)
+        im = im / 255
+        im -= img_mean
+        im /= img_std
+        new_h, new_w, _ = im.shape
+        im_padded = np.zeros((input_size, input_size, 3), dtype=np.float32)
+        im_padded[:new_h, :new_w, :] = im
+        im_padded = im_padded.transpose((2, 0, 1))
+        im_padded = im_padded[np.newaxis, :]
+        return im_padded, im_scale
+
+    def convert_label_infor(self, label_infor):
+        label_infor = label_infor.decode()
+        label_infor = label_infor.encode('utf-8').decode('utf-8-sig')
+        substr = label_infor.strip("\n").split("\t")
+        img_path = self.img_set_dir + substr[0]
+        label = json.loads(substr[1])
+        nBox = len(label)
+        wordBBs, txts, txt_tags = [], [], []
+        for bno in range(0, nBox):
+            wordBB = label[bno]['points']
+            txt = label[bno]['transcription']
+            wordBBs.append(wordBB)
+            txts.append(txt)
+            if txt == '###':
+                txt_tags.append(True)
+            else:
+                txt_tags.append(False)
+        wordBBs = np.array(wordBBs, dtype=np.float32)
+        txt_tags = np.array(txt_tags, dtype=np.bool)
+        return img_path, wordBBs, txt_tags, txts
+
+    def rotate_im_poly(self, im, text_polys):
+        """
+        rotate image with 90 / 180 / 270 degre
+        """
+        im_w, im_h = im.shape[1], im.shape[0]
+        dst_im = im.copy()
+        dst_polys = []
+        rand_degree_ratio = np.random.rand()
+        rand_degree_cnt = 1
+        if rand_degree_ratio > 0.333 and rand_degree_ratio < 0.666:
+            rand_degree_cnt = 2
+        elif rand_degree_ratio > 0.666:
+            rand_degree_cnt = 3
+        for i in range(rand_degree_cnt):
+            dst_im = np.rot90(dst_im)
+        rot_degree = -90 * rand_degree_cnt
+        rot_angle = rot_degree * math.pi / 180.0
+        n_poly = text_polys.shape[0]
+        cx, cy = 0.5 * im_w, 0.5 * im_h
+        ncx, ncy = 0.5 * dst_im.shape[1], 0.5 * dst_im.shape[0]
+        for i in range(n_poly):
+            wordBB = text_polys[i]
+            poly = []
+            for j in range(4):
+                sx, sy = wordBB[j][0], wordBB[j][1]
+                dx = math.cos(rot_angle) * (sx - cx)\
+                    - math.sin(rot_angle) * (sy - cy) + ncx
+                dy = math.sin(rot_angle) * (sx - cx)\
+                    + math.cos(rot_angle) * (sy - cy) + ncy
+                poly.append([dx, dy])
+            dst_polys.append(poly)
+        dst_polys = np.array(dst_polys, dtype=np.float32)
+        return dst_im, dst_polys
+
+    def polygon_area(self, poly):
+        """
+        compute area of a polygon
+        :param poly:
+        :return:
+        """
+        edge = [(poly[1][0] - poly[0][0]) * (poly[1][1] + poly[0][1]),
+                (poly[2][0] - poly[1][0]) * (poly[2][1] + poly[1][1]),
+                (poly[3][0] - poly[2][0]) * (poly[3][1] + poly[2][1]),
+                (poly[0][0] - poly[3][0]) * (poly[0][1] + poly[3][1])]
+        return np.sum(edge) / 2.
+
+    def check_and_validate_polys(self, polys, tags, img_height, img_width):
+        """
+        check so that the text poly is in the same direction,
+        and also filter some invalid polygons
+        :param polys:
+        :param tags:
+        :return:
+        """
+        h, w = img_height, img_width
+        if polys.shape[0] == 0:
+            return polys
+        polys[:, :, 0] = np.clip(polys[:, :, 0], 0, w - 1)
+        polys[:, :, 1] = np.clip(polys[:, :, 1], 0, h - 1)
+
+        validated_polys = []
+        validated_tags = []
+        for poly, tag in zip(polys, tags):
+            p_area = self.polygon_area(poly)
+            #invalid poly
+            if abs(p_area) < 1:
+                continue
+            if p_area > 0:
+                #'poly in wrong direction'
+                if tag == False:
+                    tag = True  #reversed cases should be ignore
+                poly = poly[(0, 3, 2, 1), :]
+            validated_polys.append(poly)
+            validated_tags.append(tag)
+        return np.array(validated_polys), np.array(validated_tags)
+
+    def draw_img_polys(self, img, polys):
+        if len(img.shape) == 4:
+            img = np.squeeze(img, axis=0)
+        if img.shape[0] == 3:
+            img = img.transpose((1, 2, 0))
+            img[:, :, 2] += 123.68
+            img[:, :, 1] += 116.78
+            img[:, :, 0] += 103.94
+        cv2.imwrite("tmp.jpg", img)
+        img = cv2.imread("tmp.jpg")
+        for box in polys:
+            box = box.astype(np.int32).reshape((-1, 1, 2))
+            cv2.polylines(img, [box], True, color=(255, 255, 0), thickness=2)
+        import random
+        ino = random.randint(0, 100)
+        cv2.imwrite("tmp_%d.jpg" % ino, img)
+        return
+
+    def shrink_poly(self, poly, r):
+        """
+        fit a poly inside the origin poly, maybe bugs here...
+        used for generate the score map
+        :param poly: the text poly
+        :param r: r in the paper
+        :return: the shrinked poly
+        """
+        # shrink ratio
+        R = 0.3
+        # find the longer pair
+        dist0 = np.linalg.norm(poly[0] - poly[1])
+        dist1 = np.linalg.norm(poly[2] - poly[3])
+        dist2 = np.linalg.norm(poly[0] - poly[3])
+        dist3 = np.linalg.norm(poly[1] - poly[2])
+        if dist0 + dist1 > dist2 + dist3:
+            # first move (p0, p1), (p2, p3), then (p0, p3), (p1, p2)
+            ## p0, p1
+            theta = np.arctan2((poly[1][1] - poly[0][1]),
+                               (poly[1][0] - poly[0][0]))
+            poly[0][0] += R * r[0] * np.cos(theta)
+            poly[0][1] += R * r[0] * np.sin(theta)
+            poly[1][0] -= R * r[1] * np.cos(theta)
+            poly[1][1] -= R * r[1] * np.sin(theta)
+            ## p2, p3
+            theta = np.arctan2((poly[2][1] - poly[3][1]),
+                               (poly[2][0] - poly[3][0]))
+            poly[3][0] += R * r[3] * np.cos(theta)
+            poly[3][1] += R * r[3] * np.sin(theta)
+            poly[2][0] -= R * r[2] * np.cos(theta)
+            poly[2][1] -= R * r[2] * np.sin(theta)
+            ## p0, p3
+            theta = np.arctan2((poly[3][0] - poly[0][0]),
+                               (poly[3][1] - poly[0][1]))
+            poly[0][0] += R * r[0] * np.sin(theta)
+            poly[0][1] += R * r[0] * np.cos(theta)
+            poly[3][0] -= R * r[3] * np.sin(theta)
+            poly[3][1] -= R * r[3] * np.cos(theta)
+            ## p1, p2
+            theta = np.arctan2((poly[2][0] - poly[1][0]),
+                               (poly[2][1] - poly[1][1]))
+            poly[1][0] += R * r[1] * np.sin(theta)
+            poly[1][1] += R * r[1] * np.cos(theta)
+            poly[2][0] -= R * r[2] * np.sin(theta)
+            poly[2][1] -= R * r[2] * np.cos(theta)
+        else:
+            ## p0, p3
+            # print poly
+            theta = np.arctan2((poly[3][0] - poly[0][0]),
+                               (poly[3][1] - poly[0][1]))
+            poly[0][0] += R * r[0] * np.sin(theta)
+            poly[0][1] += R * r[0] * np.cos(theta)
+            poly[3][0] -= R * r[3] * np.sin(theta)
+            poly[3][1] -= R * r[3] * np.cos(theta)
+            ## p1, p2
+            theta = np.arctan2((poly[2][0] - poly[1][0]),
+                               (poly[2][1] - poly[1][1]))
+            poly[1][0] += R * r[1] * np.sin(theta)
+            poly[1][1] += R * r[1] * np.cos(theta)
+            poly[2][0] -= R * r[2] * np.sin(theta)
+            poly[2][1] -= R * r[2] * np.cos(theta)
+            ## p0, p1
+            theta = np.arctan2((poly[1][1] - poly[0][1]),
+                               (poly[1][0] - poly[0][0]))
+            poly[0][0] += R * r[0] * np.cos(theta)
+            poly[0][1] += R * r[0] * np.sin(theta)
+            poly[1][0] -= R * r[1] * np.cos(theta)
+            poly[1][1] -= R * r[1] * np.sin(theta)
+            ## p2, p3
+            theta = np.arctan2((poly[2][1] - poly[3][1]),
+                               (poly[2][0] - poly[3][0]))
+            poly[3][0] += R * r[3] * np.cos(theta)
+            poly[3][1] += R * r[3] * np.sin(theta)
+            poly[2][0] -= R * r[2] * np.cos(theta)
+            poly[2][1] -= R * r[2] * np.sin(theta)
+        return poly
+
+    def generate_quad(self, im_size, polys, tags):
+        """
+        Generate quadrangle.
+        """
+        h, w = im_size
+        poly_mask = np.zeros((h, w), dtype=np.uint8)
+        score_map = np.zeros((h, w), dtype=np.uint8)
+        # (x1, y1, ..., x4, y4, short_edge_norm)
+        geo_map = np.zeros((h, w, 9), dtype=np.float32)
+        # mask used during traning, to ignore some hard areas
+        training_mask = np.ones((h, w), dtype=np.uint8)
+        for poly_idx, poly_tag in enumerate(zip(polys, tags)):
+            poly = poly_tag[0]
+            tag = poly_tag[1]
+
+            r = [None, None, None, None]
+            for i in range(4):
+                dist1 = np.linalg.norm(poly[i] - poly[(i + 1) % 4])
+                dist2 = np.linalg.norm(poly[i] - poly[(i - 1) % 4])
+                r[i] = min(dist1, dist2)
+            # score map
+            shrinked_poly = self.shrink_poly(
+                poly.copy(), r).astype(np.int32)[np.newaxis, :, :]
+            cv2.fillPoly(score_map, shrinked_poly, 1)
+            cv2.fillPoly(poly_mask, shrinked_poly, poly_idx + 1)
+            # if the poly is too small, then ignore it during training
+            poly_h = min(
+                np.linalg.norm(poly[0] - poly[3]),
+                np.linalg.norm(poly[1] - poly[2]))
+            poly_w = min(
+                np.linalg.norm(poly[0] - poly[1]),
+                np.linalg.norm(poly[2] - poly[3]))
+            if min(poly_h, poly_w) < self.min_text_size:
+                cv2.fillPoly(training_mask,
+                             poly.astype(np.int32)[np.newaxis, :, :], 0)
+
+            if tag:
+                cv2.fillPoly(training_mask,
+                             poly.astype(np.int32)[np.newaxis, :, :], 0)
+
+            xy_in_poly = np.argwhere(poly_mask == (poly_idx + 1))
+            # geo map.
+            y_in_poly = xy_in_poly[:, 0]
+            x_in_poly = xy_in_poly[:, 1]
+            poly[:, 0] = np.minimum(np.maximum(poly[:, 0], 0), w)
+            poly[:, 1] = np.minimum(np.maximum(poly[:, 1], 0), h)
+            for pno in range(4):
+                geo_channel_beg = pno * 2
+                geo_map[y_in_poly, x_in_poly, geo_channel_beg] =\
+                    x_in_poly - poly[pno, 0]
+                geo_map[y_in_poly, x_in_poly, geo_channel_beg+1] =\
+                    y_in_poly - poly[pno, 1]
+            geo_map[y_in_poly, x_in_poly, 8] = \
+                1.0 / max(min(poly_h, poly_w), 1.0)
+        return score_map, geo_map, training_mask
+
+    def crop_area(self,
+                  im,
+                  polys,
+                  tags,
+                  txts,
+                  crop_background=False,
+                  max_tries=50):
+        """
+        make random crop from the input image
+        :param im:
+        :param polys:
+        :param tags:
+        :param crop_background:
+        :param max_tries:
+        :return:
+        """
+        h, w, _ = im.shape
+        pad_h = h // 10
+        pad_w = w // 10
+        h_array = np.zeros((h + pad_h * 2), dtype=np.int32)
+        w_array = np.zeros((w + pad_w * 2), dtype=np.int32)
+        for poly in polys:
+            poly = np.round(poly, decimals=0).astype(np.int32)
+            minx = np.min(poly[:, 0])
+            maxx = np.max(poly[:, 0])
+            w_array[minx + pad_w:maxx + pad_w] = 1
+            miny = np.min(poly[:, 1])
+            maxy = np.max(poly[:, 1])
+            h_array[miny + pad_h:maxy + pad_h] = 1
+        # ensure the cropped area not across a text
+        h_axis = np.where(h_array == 0)[0]
+        w_axis = np.where(w_array == 0)[0]
+        if len(h_axis) == 0 or len(w_axis) == 0:
+            return im, polys, tags, txts
+
+        for i in range(max_tries):
+            xx = np.random.choice(w_axis, size=2)
+            xmin = np.min(xx) - pad_w
+            xmax = np.max(xx) - pad_w
+            xmin = np.clip(xmin, 0, w - 1)
+            xmax = np.clip(xmax, 0, w - 1)
+            yy = np.random.choice(h_axis, size=2)
+            ymin = np.min(yy) - pad_h
+            ymax = np.max(yy) - pad_h
+            ymin = np.clip(ymin, 0, h - 1)
+            ymax = np.clip(ymax, 0, h - 1)
+            if xmax - xmin < self.min_crop_side_ratio * w or \
+               ymax - ymin < self.min_crop_side_ratio * h:
+                # area too small
+                continue
+            if polys.shape[0] != 0:
+                poly_axis_in_area = (polys[:, :, 0] >= xmin)\
+                    & (polys[:, :, 0] <= xmax)\
+                    & (polys[:, :, 1] >= ymin)\
+                    & (polys[:, :, 1] <= ymax)
+                selected_polys = np.where(
+                    np.sum(poly_axis_in_area, axis=1) == 4)[0]
+            else:
+                selected_polys = []
+
+            if len(selected_polys) == 0:
+                # no text in this area
+                if crop_background:
+                    im = im[ymin:ymax + 1, xmin:xmax + 1, :]
+                    polys = []
+                    tags = []
+                    txts = []
+                    return im, polys, tags, txts
+                else:
+                    continue
+
+            im = im[ymin:ymax + 1, xmin:xmax + 1, :]
+            polys = polys[selected_polys]
+            tags = tags[selected_polys]
+            txts_tmp = []
+            for selected_poly in selected_polys:
+                txts_tmp.append(txts[selected_poly])
+            txts = txts_tmp
+            polys[:, :, 0] -= xmin
+            polys[:, :, 1] -= ymin
+            return im, polys, tags, txts
+        return im, polys, tags, txts
+
+    def crop_background_infor(self, im, text_polys, text_tags, text_strs):
+        im, text_polys, text_tags, text_strs = self.crop_area(
+            im, text_polys, text_tags, text_strs, crop_background=True)
+        if len(text_polys) > 0:
+            return None
+        # pad and resize image
+        input_size = self.input_size
+        im, ratio = self.preprocess(im)
+        score_map = np.zeros((input_size, input_size), dtype=np.float32)
+        geo_map = np.zeros((input_size, input_size, 9), dtype=np.float32)
+        training_mask = np.ones((input_size, input_size), dtype=np.float32)
+        return im, score_map, geo_map, training_mask
+
+    def crop_foreground_infor(self, im, text_polys, text_tags, text_strs):
+        im, text_polys, text_tags, text_strs = self.crop_area(
+            im, text_polys, text_tags, text_strs, crop_background=False)
+        if text_polys.shape[0] == 0:
+            return None
+        #continue for all ignore case
+        if np.sum((text_tags * 1.0)) >= text_tags.size:
+            return None
+        # pad and resize image
+        input_size = self.input_size
+        im, ratio = self.preprocess(im)
+        text_polys[:, :, 0] *= ratio
+        text_polys[:, :, 1] *= ratio
+        _, _, new_h, new_w = im.shape
+        #         print(im.shape)
+        #         self.draw_img_polys(im, text_polys)
+        score_map, geo_map, training_mask = self.generate_quad(
+            (new_h, new_w), text_polys, text_tags)
+        return im, score_map, geo_map, training_mask
+
+    def __call__(self, label_infor):
+        infor = self.convert_label_infor(label_infor)
+        im_path, text_polys, text_tags, text_strs = infor
+        im = cv2.imread(im_path)
+        if im is None:
+            return None
+        if text_polys.shape[0] == 0:
+            return None
+        #add rotate cases
+        if np.random.rand() < 0.5:
+            im, text_polys = self.rotate_im_poly(im, text_polys)
+        h, w, _ = im.shape
+        text_polys, text_tags = self.check_and_validate_polys(text_polys,
+                                                              text_tags, h, w)
+        if text_polys.shape[0] == 0:
+            return None
+
+        # random scale this image
+        rd_scale = np.random.choice(self.random_scale)
+        im = cv2.resize(im, dsize=None, fx=rd_scale, fy=rd_scale)
+        text_polys *= rd_scale
+        if np.random.rand() < self.background_ratio:
+            outs = self.crop_background_infor(im, text_polys, text_tags,
+                                              text_strs)
+        else:
+            outs = self.crop_foreground_infor(im, text_polys, text_tags,
+                                              text_strs)
+
+        if outs is None:
+            return None
+        im, score_map, geo_map, training_mask = outs
+        score_map = score_map[np.newaxis, ::4, ::4].astype(np.float32)
+        geo_map = np.swapaxes(geo_map, 1, 2)
+        geo_map = np.swapaxes(geo_map, 1, 0)
+        geo_map = geo_map[:, ::4, ::4].astype(np.float32)
+        training_mask = training_mask[np.newaxis, ::4, ::4]
+        training_mask = training_mask.astype(np.float32)
+        return im, score_map, geo_map, training_mask
+
+
+class EASTProcessTest(object):
+    def __init__(self, params):
+        super(EASTProcessTest, self).__init__()
+        if 'max_side_len' in params:
+            self.max_side_len = params['max_side_len']
+        else:
+            self.max_side_len = 2400
+
+    def resize_image(self, im):
+        """
+        resize image to a size multiple of 32 which is required by the network
+        :param im: the resized image
+        :param max_side_len: limit of max image size to avoid out of memory in gpu
+        :return: the resized image and the resize ratio
+        """
+        max_side_len = self.max_side_len
+        h, w, _ = im.shape
+
+        resize_w = w
+        resize_h = h
+
+        # limit the max side
+        if max(resize_h, resize_w) > max_side_len:
+            if resize_h > resize_w:
+                ratio = float(max_side_len) / resize_h
+            else:
+                ratio = float(max_side_len) / resize_w
+        else:
+            ratio = 1.
+        resize_h = int(resize_h * ratio)
+        resize_w = int(resize_w * ratio)
+        if resize_h % 32 == 0:
+            resize_h = resize_h
+        else:
+            resize_h = (resize_h // 32 - 1) * 32
+        if resize_w % 32 == 0:
+            resize_w = resize_w
+        else:
+            resize_w = (resize_w // 32 - 1) * 32
+        im = cv2.resize(im, (int(resize_w), int(resize_h)))
+        ratio_h = resize_h / float(h)
+        ratio_w = resize_w / float(w)
+        return im, (ratio_h, ratio_w)
+
+    def __call__(self, im):
+        im, (ratio_h, ratio_w) = self.resize_image(im)
+        img_mean = [0.485, 0.456, 0.406]
+        img_std = [0.229, 0.224, 0.225]
+        im = im[:, :, ::-1].astype(np.float32)
+        im = im / 255
+        im -= img_mean
+        im /= img_std
+        im = im.transpose((2, 0, 1))
+        im = im[np.newaxis, :]
+        return [im, (ratio_h, ratio_w)]
--- a/ppocr/data/det/make_border_map.py
+++ b/ppocr/data/det/make_border_map.py
@ -0,0 +1,147 @@
+# -*- coding:utf-8 -*- 
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import numpy as np
+import cv2
+np.seterr(divide='ignore', invalid='ignore')
+import pyclipper
+from shapely.geometry import Polygon
+import sys
+import warnings
+warnings.simplefilter("ignore")
+
+
+def draw_border_map(polygon, canvas, mask, shrink_ratio):
+    polygon = np.array(polygon)
+    assert polygon.ndim == 2
+    assert polygon.shape[1] == 2
+
+    polygon_shape = Polygon(polygon)
+    if polygon_shape.area <= 0:
+        return
+    distance = polygon_shape.area * (
+        1 - np.power(shrink_ratio, 2)) / polygon_shape.length
+    subject = [tuple(l) for l in polygon]
+    padding = pyclipper.PyclipperOffset()
+    padding.AddPath(subject, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON)
+
+    padded_polygon = np.array(padding.Execute(distance)[0])
+    cv2.fillPoly(mask, [padded_polygon.astype(np.int32)], 1.0)
+
+    xmin = padded_polygon[:, 0].min()
+    xmax = padded_polygon[:, 0].max()
+    ymin = padded_polygon[:, 1].min()
+    ymax = padded_polygon[:, 1].max()
+    width = xmax - xmin + 1
+    height = ymax - ymin + 1
+
+    polygon[:, 0] = polygon[:, 0] - xmin
+    polygon[:, 1] = polygon[:, 1] - ymin
+
+    xs = np.broadcast_to(
+        np.linspace(
+            0, width - 1, num=width).reshape(1, width), (height, width))
+    ys = np.broadcast_to(
+        np.linspace(
+            0, height - 1, num=height).reshape(height, 1), (height, width))
+
+    distance_map = np.zeros((polygon.shape[0], height, width), dtype=np.float32)
+    for i in range(polygon.shape[0]):
+        j = (i + 1) % polygon.shape[0]
+        absolute_distance = _distance(xs, ys, polygon[i], polygon[j])
+        distance_map[i] = np.clip(absolute_distance / distance, 0, 1)
+    distance_map = distance_map.min(axis=0)
+
+    xmin_valid = min(max(0, xmin), canvas.shape[1] - 1)
+    xmax_valid = min(max(0, xmax), canvas.shape[1] - 1)
+    ymin_valid = min(max(0, ymin), canvas.shape[0] - 1)
+    ymax_valid = min(max(0, ymax), canvas.shape[0] - 1)
+    canvas[ymin_valid:ymax_valid + 1, xmin_valid:xmax_valid + 1] = np.fmax(
+        1 - distance_map[ymin_valid - ymin:ymax_valid - ymax + height,
+                         xmin_valid - xmin:xmax_valid - xmax + width],
+        canvas[ymin_valid:ymax_valid + 1, xmin_valid:xmax_valid + 1])
+
+
+def _distance(xs, ys, point_1, point_2):
+    '''
+    compute the distance from point to a line
+    ys: coordinates in the first axis
+    xs: coordinates in the second axis
+    point_1, point_2: (x, y), the end of the line
+    '''
+    height, width = xs.shape[:2]
+    square_distance_1 = np.square(xs - point_1[0]) + np.square(ys - point_1[1])
+    square_distance_2 = np.square(xs - point_2[0]) + np.square(ys - point_2[1])
+    square_distance = np.square(point_1[0] - point_2[0]) + np.square(point_1[
+        1] - point_2[1])
+
+    cosin = (square_distance - square_distance_1 - square_distance_2) / (
+        2 * np.sqrt(square_distance_1 * square_distance_2))
+    square_sin = 1 - np.square(cosin)
+    square_sin = np.nan_to_num(square_sin)
+    result = np.sqrt(square_distance_1 * square_distance_2 * square_sin /
+                     square_distance)
+
+    result[cosin <
+           0] = np.sqrt(np.fmin(square_distance_1, square_distance_2))[cosin <
+                                                                       0]
+    # self.extend_line(point_1, point_2, result)
+    return result
+
+
+def extend_line(point_1, point_2, result, shrink_ratio):
+    ex_point_1 = (
+        int(
+            round(point_1[0] + (point_1[0] - point_2[0]) * (1 + shrink_ratio))),
+        int(
+            round(point_1[1] + (point_1[1] - point_2[1]) * (1 + shrink_ratio))))
+    cv2.line(
+        result,
+        tuple(ex_point_1),
+        tuple(point_1),
+        4096.0,
+        1,
+        lineType=cv2.LINE_AA,
+        shift=0)
+    ex_point_2 = (
+        int(
+            round(point_2[0] + (point_2[0] - point_1[0]) * (1 + shrink_ratio))),
+        int(
+            round(point_2[1] + (point_2[1] - point_1[1]) * (1 + shrink_ratio))))
+    cv2.line(
+        result,
+        tuple(ex_point_2),
+        tuple(point_2),
+        4096.0,
+        1,
+        lineType=cv2.LINE_AA,
+        shift=0)
+    return ex_point_1, ex_point_2
+
+
+def MakeBorderMap(data):
+    shrink_ratio = 0.4
+    thresh_min = 0.3
+    thresh_max = 0.7
+
+    im = data['image']
+    text_polys = data['polys']
+    ignore_tags = data['ignore_tags']
+
+    canvas = np.zeros(im.shape[:2], dtype=np.float32)
+    mask = np.zeros(im.shape[:2], dtype=np.float32)
+
+    for i in range(len(text_polys)):
+        if ignore_tags[i]:
+            continue
+        draw_border_map(
+            text_polys[i], canvas, mask=mask, shrink_ratio=shrink_ratio)
+    canvas = canvas * (thresh_max - thresh_min) + thresh_min
+
+    data['threshold_map'] = canvas
+    data['threshold_mask'] = mask
+    return data
--- a/ppocr/data/det/make_shrink_map.py
+++ b/ppocr/data/det/make_shrink_map.py
@ -0,0 +1,88 @@
+# -*- coding:utf-8 -*- 
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import numpy as np
+import cv2
+from shapely.geometry import Polygon
+import pyclipper
+
+
+def validate_polygons(polygons, ignore_tags, h, w):
+    '''
+    polygons (numpy.array, required): of shape (num_instances, num_points, 2)
+    '''
+    if len(polygons) == 0:
+        return polygons, ignore_tags
+    assert len(polygons) == len(ignore_tags)
+    for polygon in polygons:
+        polygon[:, 0] = np.clip(polygon[:, 0], 0, w - 1)
+        polygon[:, 1] = np.clip(polygon[:, 1], 0, h - 1)
+
+    for i in range(len(polygons)):
+        area = polygon_area(polygons[i])
+        if abs(area) < 1:
+            ignore_tags[i] = True
+        if area > 0:
+            polygons[i] = polygons[i][::-1, :]
+    return polygons, ignore_tags
+
+
+def polygon_area(polygon):
+    edge = 0
+    for i in range(polygon.shape[0]):
+        next_index = (i + 1) % polygon.shape[0]
+        edge += (polygon[next_index, 0] - polygon[i, 0]) * (
+            polygon[next_index, 1] - polygon[i, 1])
+
+    return edge / 2.
+
+
+def MakeShrinkMap(data):
+    min_text_size = 8
+    shrink_ratio = 0.4
+
+    image = data['image']
+    text_polys = data['polys']
+    ignore_tags = data['ignore_tags']
+
+    h, w = image.shape[:2]
+    text_polys, ignore_tags = validate_polygons(text_polys, ignore_tags, h, w)
+    gt = np.zeros((h, w), dtype=np.float32)
+    # gt = np.zeros((1, h, w), dtype=np.float32)
+    mask = np.ones((h, w), dtype=np.float32)
+    for i in range(len(text_polys)):
+        polygon = text_polys[i]
+        height = max(polygon[:, 1]) - min(polygon[:, 1])
+        width = max(polygon[:, 0]) - min(polygon[:, 0])
+        # height = min(np.linalg.norm(polygon[0] - polygon[3]),
+        #             np.linalg.norm(polygon[1] - polygon[2]))
+        # width = min(np.linalg.norm(polygon[0] - polygon[1]),
+        #             np.linalg.norm(polygon[2] - polygon[3]))
+        if ignore_tags[i] or min(height, width) < min_text_size:
+            cv2.fillPoly(mask, polygon.astype(np.int32)[np.newaxis, :, :], 0)
+            ignore_tags[i] = True
+        else:
+            polygon_shape = Polygon(polygon)
+            distance = polygon_shape.area * (
+                1 - np.power(shrink_ratio, 2)) / polygon_shape.length
+            subject = [tuple(l) for l in text_polys[i]]
+            padding = pyclipper.PyclipperOffset()
+            padding.AddPath(subject, pyclipper.JT_ROUND,
+                            pyclipper.ET_CLOSEDPOLYGON)
+            shrinked = padding.Execute(-distance)
+            if shrinked == []:
+                cv2.fillPoly(mask,
+                             polygon.astype(np.int32)[np.newaxis, :, :], 0)
+                ignore_tags[i] = True
+                continue
+            shrinked = np.array(shrinked[0]).reshape(-1, 2)
+            cv2.fillPoly(gt, [shrinked.astype(np.int32)], 1)
+            # cv2.fillPoly(gt[0], [shrinked.astype(np.int32)], 1)
+
+    data['shrink_map'] = gt
+    data['shrink_mask'] = mask
+    return data
--- a/ppocr/data/det/random_crop_data.py
+++ b/ppocr/data/det/random_crop_data.py
@ -0,0 +1,155 @@
+# -*- coding:utf-8 -*- 
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import numpy as np
+import cv2
+import random
+
+
+def is_poly_in_rect(poly, x, y, w, h):
+    poly = np.array(poly)
+    if poly[:, 0].min() < x or poly[:, 0].max() > x + w:
+        return False
+    if poly[:, 1].min() < y or poly[:, 1].max() > y + h:
+        return False
+    return True
+
+
+def is_poly_outside_rect(poly, x, y, w, h):
+    poly = np.array(poly)
+    if poly[:, 0].max() < x or poly[:, 0].min() > x + w:
+        return True
+    if poly[:, 1].max() < y or poly[:, 1].min() > y + h:
+        return True
+    return False
+
+
+def split_regions(axis):
+    regions = []
+    min_axis = 0
+    for i in range(1, axis.shape[0]):
+        if axis[i] != axis[i - 1] + 1:
+            region = axis[min_axis:i]
+            min_axis = i
+            regions.append(region)
+    return regions
+
+
+def random_select(axis, max_size):
+    xx = np.random.choice(axis, size=2)
+    xmin = np.min(xx)
+    xmax = np.max(xx)
+    xmin = np.clip(xmin, 0, max_size - 1)
+    xmax = np.clip(xmax, 0, max_size - 1)
+    return xmin, xmax
+
+
+def region_wise_random_select(regions, max_size):
+    selected_index = list(np.random.choice(len(regions), 2))
+    selected_values = []
+    for index in selected_index:
+        axis = regions[index]
+        xx = int(np.random.choice(axis, size=1))
+        selected_values.append(xx)
+    xmin = min(selected_values)
+    xmax = max(selected_values)
+    return xmin, xmax
+
+
+def crop_area(im, text_polys, min_crop_side_ratio, max_tries):
+    h, w, _ = im.shape
+    h_array = np.zeros(h, dtype=np.int32)
+    w_array = np.zeros(w, dtype=np.int32)
+    for points in text_polys:
+        points = np.round(points, decimals=0).astype(np.int32)
+        minx = np.min(points[:, 0])
+        maxx = np.max(points[:, 0])
+        w_array[minx:maxx] = 1
+        miny = np.min(points[:, 1])
+        maxy = np.max(points[:, 1])
+        h_array[miny:maxy] = 1
+    # ensure the cropped area not across a text
+    h_axis = np.where(h_array == 0)[0]
+    w_axis = np.where(w_array == 0)[0]
+
+    if len(h_axis) == 0 or len(w_axis) == 0:
+        return 0, 0, w, h
+
+    h_regions = split_regions(h_axis)
+    w_regions = split_regions(w_axis)
+
+    for i in range(max_tries):
+        if len(w_regions) > 1:
+            xmin, xmax = region_wise_random_select(w_regions, w)
+        else:
+            xmin, xmax = random_select(w_axis, w)
+        if len(h_regions) > 1:
+            ymin, ymax = region_wise_random_select(h_regions, h)
+        else:
+            ymin, ymax = random_select(h_axis, h)
+
+        if xmax - xmin < min_crop_side_ratio * w or ymax - ymin < min_crop_side_ratio * h:
+            # area too small
+            continue
+        num_poly_in_rect = 0
+        for poly in text_polys:
+            if not is_poly_outside_rect(poly, xmin, ymin, xmax - xmin,
+                                        ymax - ymin):
+                num_poly_in_rect += 1
+                break
+
+        if num_poly_in_rect > 0:
+            return xmin, ymin, xmax - xmin, ymax - ymin
+
+    return 0, 0, w, h
+
+
+def RandomCropData(data, size):
+    max_tries = 10
+    min_crop_side_ratio = 0.1
+    require_original_image = False
+    keep_ratio = True
+
+    im = data['image']
+    text_polys = data['polys']
+    ignore_tags = data['ignore_tags']
+    texts = data['texts']
+    all_care_polys = [
+        text_polys[i] for i, tag in enumerate(ignore_tags) if not tag
+    ]
+    # 计算crop区域
+    crop_x, crop_y, crop_w, crop_h = crop_area(im, all_care_polys,
+                                               min_crop_side_ratio, max_tries)
+    # crop 图片 保持比例填充
+    scale_w = size[0] / crop_w
+    scale_h = size[1] / crop_h
+    scale = min(scale_w, scale_h)
+    h = int(crop_h * scale)
+    w = int(crop_w * scale)
+    if keep_ratio:
+        padimg = np.zeros((size[1], size[0], im.shape[2]), im.dtype)
+        padimg[:h, :w] = cv2.resize(
+            im[crop_y:crop_y + crop_h, crop_x:crop_x + crop_w], (w, h))
+        img = padimg
+    else:
+        img = cv2.resize(im[crop_y:crop_y + crop_h, crop_x:crop_x + crop_w],
+                         tuple(size))
+    # crop 文本框
+    text_polys_crop = []
+    ignore_tags_crop = []
+    texts_crop = []
+    for poly, text, tag in zip(text_polys, texts, ignore_tags):
+        poly = ((poly - (crop_x, crop_y)) * scale).tolist()
+        if not is_poly_outside_rect(poly, 0, 0, w, h):
+            text_polys_crop.append(poly)
+            ignore_tags_crop.append(tag)
+            texts_crop.append(text)
+    data['image'] = img
+    data['polys'] = np.array(text_polys_crop)
+    data['ignore_tags'] = ignore_tags_crop
+    data['texts'] = texts_crop
+    return data
--- a/ppocr/data/reader_main.py
+++ b/ppocr/data/reader_main.py
@ -0,0 +1,81 @@
+#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
+
+import os
+import random
+import numpy as np
+
+import paddle
+from ppocr.utils.utility import create_module
+from copy import deepcopy
+
+from .rec.img_tools import process_image
+import cv2
+
+import sys
+import signal
+
+
+# handle terminate reader process, do not print stack frame
+def _reader_quit(signum, frame):
+    print("Reader process exit.")
+    sys.exit()
+
+
+def _term_group(sig_num, frame):
+    print('pid {} terminated, terminate group '
+          '{}...'.format(os.getpid(), os.getpgrp()))
+    os.killpg(os.getpgid(os.getpid()), signal.SIGKILL)
+
+
+signal.signal(signal.SIGTERM, _reader_quit)
+signal.signal(signal.SIGINT, _term_group)
+
+
+def reader_main(config=None, mode=None):
+    """Create a reader for trainning
+
+    Args:
+        settings: arguments
+
+    Returns:
+        train reader
+    """
+    assert mode in ["train", "eval", "test"],\
+        "Nonsupport mode:{}".format(mode)
+    global_params = config['Global']
+    if mode == "train":
+        params = deepcopy(config['TrainReader'])
+    elif mode == "eval":
+        params = deepcopy(config['EvalReader'])
+    else:
+        params = deepcopy(config['TestReader'])
+    params['mode'] = mode
+    params.update(global_params)
+    reader_function = params['reader_function']
+    function = create_module(reader_function)(params)
+    if mode == "train":
+        readers = []
+        num_workers = params['num_workers']
+        for process_id in range(num_workers):
+            readers.append(function(process_id))
+        return paddle.reader.multiprocess_reader(readers, False)
+    else:
+        return function(mode)
+
+
+def test_reader(image_shape, img_path):
+    img = cv2.imread(img_path)
+    norm_img = process_image(img, image_shape)
+    return norm_img
--- a/ppocr/data/rec/init.py
+++ b/ppocr/data/rec/init.py
@ -0,0 +1,13 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
--- a/ppocr/data/rec/dataset_traversal.py
+++ b/ppocr/data/rec/dataset_traversal.py
@ -0,0 +1,201 @@
+#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
+
+import os
+import math
+import random
+import numpy as np
+import cv2
+
+import string
+import lmdb
+
+from ppocr.utils.utility import initial_logger
+logger = initial_logger()
+
+from .img_tools import process_image, get_img_data
+
+
+class LMDBReader(object):
+    def __init__(self, params):
+        if params['mode'] != 'train':
+            self.num_workers = 1
+        else:
+            self.num_workers = params['num_workers']
+        self.lmdb_sets_dir = params['lmdb_sets_dir']
+        self.char_ops = params['char_ops']
+        self.image_shape = params['image_shape']
+        self.loss_type = params['loss_type']
+        self.max_text_length = params['max_text_length']
+        self.mode = params['mode']
+        if params['mode'] == 'train':
+            self.batch_size = params['train_batch_size_per_card']
+        else:
+            self.batch_size = params['test_batch_size_per_card']
+
+    def load_hierarchical_lmdb_dataset(self):
+        lmdb_sets = {}
+        dataset_idx = 0
+        for dirpath, dirnames, filenames in os.walk(self.lmdb_sets_dir + '/'):
+            if not dirnames:
+                env = lmdb.open(
+                    dirpath,
+                    max_readers=32,
+                    readonly=True,
+                    lock=False,
+                    readahead=False,
+                    meminit=False)
+                txn = env.begin(write=False)
+                num_samples = int(txn.get('num-samples'.encode()))
+                lmdb_sets[dataset_idx] = {"dirpath":dirpath, "env":env, \
+                    "txn":txn, "num_samples":num_samples}
+                dataset_idx += 1
+        return lmdb_sets
+
+    def print_lmdb_sets_info(self, lmdb_sets):
+        lmdb_info_strs = []
+        for dataset_idx in range(len(lmdb_sets)):
+            tmp_str = " %s:%d," % (lmdb_sets[dataset_idx]['dirpath'],
+                                   lmdb_sets[dataset_idx]['num_samples'])
+            lmdb_info_strs.append(tmp_str)
+        lmdb_info_strs = ''.join(lmdb_info_strs)
+        logger.info("DataSummary:" + lmdb_info_strs)
+        return
+
+    def close_lmdb_dataset(self, lmdb_sets):
+        for dataset_idx in lmdb_sets:
+            lmdb_sets[dataset_idx]['env'].close()
+        return
+
+    def get_lmdb_sample_info(self, txn, index):
+        label_key = 'label-%09d'.encode() % index
+        label = txn.get(label_key)
+        if label is None:
+            return None
+        label = label.decode('utf-8')
+        img_key = 'image-%09d'.encode() % index
+        imgbuf = txn.get(img_key)
+        img = get_img_data(imgbuf)
+        if img is None:
+            return None
+        return img, label
+
+    def __call__(self, process_id):
+        if self.mode != 'train':
+            process_id = 0
+
+        def sample_iter_reader():
+            lmdb_sets = self.load_hierarchical_lmdb_dataset()
+            if process_id == 0:
+                self.print_lmdb_sets_info(lmdb_sets)
+            cur_index_sets = [1 + process_id] * len(lmdb_sets)
+            while True:
+                finish_read_num = 0
+                for dataset_idx in range(len(lmdb_sets)):
+                    cur_index = cur_index_sets[dataset_idx]
+                    if cur_index > lmdb_sets[dataset_idx]['num_samples']:
+                        finish_read_num += 1
+                    else:
+                        sample_info = self.get_lmdb_sample_info(
+                            lmdb_sets[dataset_idx]['txn'], cur_index)
+                        cur_index_sets[dataset_idx] += self.num_workers
+                        if sample_info is None:
+                            continue
+                        img, label = sample_info
+                        outs = process_image(img, self.image_shape, label,
+                                             self.char_ops, self.loss_type,
+                                             self.max_text_length)
+                        if outs is None:
+                            continue
+                        yield outs
+
+                if finish_read_num == len(lmdb_sets):
+                    break
+            self.close_lmdb_dataset(lmdb_sets)
+
+        def batch_iter_reader():
+            batch_outs = []
+            for outs in sample_iter_reader():
+                batch_outs.append(outs)
+                if len(batch_outs) == self.batch_size:
+                    yield batch_outs
+                    batch_outs = []
+            if len(batch_outs) != 0:
+                yield batch_outs
+
+        return batch_iter_reader
+
+
+class SimpleReader(object):
+    def __init__(self, params):
+        if params['mode'] != 'train':
+            self.num_workers = 1
+        else:
+            self.num_workers = params['num_workers']
+        self.img_set_dir = params['img_set_dir']
+        self.label_file_path = params['label_file_path']
+        self.char_ops = params['char_ops']
+        self.image_shape = params['image_shape']
+        self.loss_type = params['loss_type']
+        self.max_text_length = params['max_text_length']
+        self.mode = params['mode']
+        if params['mode'] == 'train':
+            self.batch_size = params['train_batch_size_per_card']
+        elif params['mode'] == 'eval':
+            self.batch_size = params['test_batch_size_per_card']
+        else:
+            self.batch_size = 1
+            self.infer_img = params['infer_img']
+
+    def __call__(self, process_id):
+        if self.mode != 'train':
+            process_id = 0
+
+        def sample_iter_reader():
+            if self.mode == 'test':
+                print("infer_img:", self.infer_img)
+                img = cv2.imread(self.infer_img)
+                norm_img = process_image(img, self.image_shape)
+                yield norm_img
+            with open(self.label_file_path, "rb") as fin:
+                label_infor_list = fin.readlines()
+            img_num = len(label_infor_list)
+            img_id_list = list(range(img_num))
+            random.shuffle(img_id_list)
+            for img_id in range(process_id, img_num, self.num_workers):
+                label_infor = label_infor_list[img_id_list[img_id]]
+                substr = label_infor.decode('utf-8').strip("\n").split("\t")
+                img_path = self.img_set_dir + "/" + substr[0]
+                img = cv2.imread(img_path)
+                if img is None:
+                    continue
+                label = substr[1]
+                outs = process_image(img, self.image_shape, label,
+                                     self.char_ops, self.loss_type,
+                                     self.max_text_length)
+                if outs is None:
+                    continue
+                yield outs
+
+        def batch_iter_reader():
+            batch_outs = []
+            for outs in sample_iter_reader():
+                batch_outs.append(outs)
+                if len(batch_outs) == self.batch_size:
+                    yield batch_outs
+                    batch_outs = []
+            if len(batch_outs) != 0:
+                yield batch_outs
+
+        return batch_iter_reader
--- a/ppocr/data/rec/img_tools.py
+++ b/ppocr/data/rec/img_tools.py
@ -0,0 +1,92 @@
+#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
+
+import math
+import cv2
+import numpy as np
+
+
+def get_bounding_box_rect(pos):
+    left = min(pos[0])
+    right = max(pos[0])
+    top = min(pos[1])
+    bottom = max(pos[1])
+    return [left, top, right, bottom]
+
+
+def resize_norm_img(img, image_shape):
+    imgC, imgH, imgW = image_shape
+    h = img.shape[0]
+    w = img.shape[1]
+    ratio = w / float(h)
+    if math.ceil(imgH * ratio) > imgW:
+        resized_w = imgW
+    else:
+        resized_w = int(math.ceil(imgH * ratio))
+    resized_image = cv2.resize(img, (resized_w, imgH))
+    resized_image = resized_image.astype('float32')
+    if image_shape[0] == 1:
+        resized_image = resized_image / 255
+        resized_image = resized_image[np.newaxis, :]
+    else:
+        resized_image = resized_image.transpose((2, 0, 1)) / 255
+    resized_image -= 0.5
+    resized_image /= 0.5
+    padding_im = np.zeros((imgC, imgH, imgW), dtype=np.float32)
+    padding_im[:, :, 0:resized_w] = resized_image
+    return padding_im
+
+
+def get_img_data(value):
+    """get_img_data"""
+    if not value:
+        return None
+    imgdata = np.frombuffer(value, dtype='uint8')
+    if imgdata is None:
+        return None
+    imgori = cv2.imdecode(imgdata, 1)
+    if imgori is None:
+        return None
+    return imgori
+
+
+def process_image(img,
+                  image_shape,
+                  label=None,
+                  char_ops=None,
+                  loss_type=None,
+                  max_text_length=None):
+    norm_img = resize_norm_img(img, image_shape)
+    norm_img = norm_img[np.newaxis, :]
+    if label is not None:
+        char_num = char_ops.get_char_num()
+        text = char_ops.encode(label)
+        if len(text) == 0 or len(text) > max_text_length:
+            return None
+        else:
+            if loss_type == "ctc":
+                text = text.reshape(-1, 1)
+                return (norm_img, text)
+            elif loss_type == "attention":
+                beg_flag_idx = char_ops.get_beg_end_flag_idx("beg")
+                end_flag_idx = char_ops.get_beg_end_flag_idx("end")
+                beg_text = np.append(beg_flag_idx, text)
+                end_text = np.append(text, end_flag_idx)
+                beg_text = beg_text.reshape(-1, 1)
+                end_text = end_text.reshape(-1, 1)
+                return (norm_img, beg_text, end_text)
+            else:
+                assert False, "Unsupport loss_type %s in process_image"\
+                    % loss_type
+    return (norm_img)
--- a/ppocr/modeling/init.py
+++ b/ppocr/modeling/init.py
@ -0,0 +1,13 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
--- a/ppocr/modeling/architectures/det_model.py
+++ b/ppocr/modeling/architectures/det_model.py
@ -0,0 +1,119 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from paddle import fluid
+
+from ppocr.utils.utility import create_module
+from ppocr.utils.utility import initial_logger
+logger = initial_logger()
+from copy import deepcopy
+
+
+class DetModel(object):
+    def __init__(self, params):
+        """
+        Detection module for OCR text detection.
+        args:
+            params (dict): the super parameters for detection module.
+        """
+        global_params = params['Global']
+        self.algorithm = global_params['algorithm']
+
+        backbone_params = deepcopy(params["Backbone"])
+        backbone_params.update(global_params)
+        self.backbone = create_module(backbone_params['function'])\
+                (params=backbone_params)
+
+        head_params = deepcopy(params["Head"])
+        head_params.update(global_params)
+        self.head = create_module(head_params['function'])\
+                (params=head_params)
+
+        loss_params = deepcopy(params["Loss"])
+        loss_params.update(global_params)
+        self.loss = create_module(loss_params['function'])\
+                (params=loss_params)
+
+        self.image_shape = global_params['image_shape']
+
+    def create_feed(self, mode):
+        """
+        create Dataloader feeds
+        args:
+            mode (str): 'train' for training  or else for evaluation
+        return: (image, corresponding label, dataloader)
+        """
+        image_shape = deepcopy(self.image_shape)
+        image = fluid.layers.data(
+            name='image', shape=image_shape, dtype='float32')
+        if mode == "train":
+            if self.algorithm == "EAST":
+                score = fluid.layers.data(
+                    name='score', shape=[1, 128, 128], dtype='float32')
+                geo = fluid.layers.data(
+                    name='geo', shape=[9, 128, 128], dtype='float32')
+                mask = fluid.layers.data(
+                    name='mask', shape=[1, 128, 128], dtype='float32')
+                feed_list = [image, score, geo, mask]
+                labels = {'score': score, 'geo': geo, 'mask': mask}
+            elif self.algorithm == "DB":
+                shrink_map = fluid.layers.data(
+                    name='shrink_map', shape=image_shape[1:], dtype='float32')
+                shrink_mask = fluid.layers.data(
+                    name='shrink_mask', shape=image_shape[1:], dtype='float32')
+                threshold_map = fluid.layers.data(
+                    name='threshold_map',
+                    shape=image_shape[1:],
+                    dtype='float32')
+                threshold_mask = fluid.layers.data(
+                    name='threshold_mask',
+                    shape=image_shape[1:],
+                    dtype='float32')
+                feed_list=[image, shrink_map, shrink_mask,\
+                    threshold_map, threshold_mask]
+                labels = {'shrink_map':shrink_map,\
+                    'shrink_mask':shrink_mask,\
+                    'threshold_map':threshold_map,\
+                    'threshold_mask':threshold_mask}
+            loader = fluid.io.DataLoader.from_generator(
+                feed_list=feed_list,
+                capacity=64,
+                use_double_buffer=True,
+                iterable=False)
+        else:
+            labels = None
+            loader = None
+        return image, labels, loader
+
+    def __call__(self, mode):
+        """
+        run forward of defined module
+        args:
+            mode (str): 'train' for training; 'export'  for inference,
+                others for evaluation]
+        """
+        image, labels, loader = self.create_feed(mode)
+        conv_feas = self.backbone(image)
+        predicts = self.head(conv_feas)
+        if mode == "train":
+            losses = self.loss(predicts, labels)
+            return loader, losses
+        elif mode == "export":
+            return [image, predicts]
+        else:
+            return loader, predicts
--- a/ppocr/modeling/architectures/rec_model.py
+++ b/ppocr/modeling/architectures/rec_model.py
@ -0,0 +1,114 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from paddle import fluid
+
+from ppocr.utils.utility import create_module
+from ppocr.utils.utility import initial_logger
+logger = initial_logger()
+from copy import deepcopy
+
+
+class RecModel(object):
+    def __init__(self, params):
+        super(RecModel, self).__init__()
+        global_params = params['Global']
+        char_num = global_params['char_ops'].get_char_num()
+        global_params['char_num'] = char_num
+        if "TPS" in params:
+            tps_params = deepcopy(params["TPS"])
+            tps_params.update(global_params)
+            self.tps = create_module(tps_params['function'])\
+                (params=tps_params)
+        else:
+            self.tps = None
+
+        backbone_params = deepcopy(params["Backbone"])
+        backbone_params.update(global_params)
+        self.backbone = create_module(backbone_params['function'])\
+                (params=backbone_params)
+
+        head_params = deepcopy(params["Head"])
+        head_params.update(global_params)
+        self.head = create_module(head_params['function'])\
+                (params=head_params)
+
+        loss_params = deepcopy(params["Loss"])
+        loss_params.update(global_params)
+        self.loss = create_module(loss_params['function'])\
+                (params=loss_params)
+
+        self.loss_type = global_params['loss_type']
+        self.image_shape = global_params['image_shape']
+        self.max_text_length = global_params['max_text_length']
+
+    def create_feed(self, mode):
+        image_shape = deepcopy(self.image_shape)
+        image_shape.insert(0, -1)
+        image = fluid.data(name='image', shape=image_shape, dtype='float32')
+        if mode == "train":
+            if self.loss_type == "attention":
+                label_in = fluid.data(
+                    name='label_in',
+                    shape=[None, 1],
+                    dtype='int32',
+                    lod_level=1)
+                label_out = fluid.data(
+                    name='label_out',
+                    shape=[None, 1],
+                    dtype='int32',
+                    lod_level=1)
+                feed_list = [image, label_in, label_out]
+                labels = {'label_in': label_in, 'label_out': label_out}
+            else:
+                label = fluid.data(
+                    name='label', shape=[None, 1], dtype='int32', lod_level=1)
+                feed_list = [image, label]
+                labels = {'label': label}
+            loader = fluid.io.DataLoader.from_generator(
+                feed_list=feed_list,
+                capacity=64,
+                use_double_buffer=True,
+                iterable=False)
+        else:
+            labels = None
+            loader = None
+        return image, labels, loader
+
+    def __call__(self, mode):
+        image, labels, loader = self.create_feed(mode)
+        if self.tps is None:
+            inputs = image
+        else:
+            inputs = self.tps(image)
+        conv_feas = self.backbone(inputs)
+        predicts = self.head(conv_feas, labels, mode)
+        decoded_out = predicts['decoded_out']
+        if mode == "train":
+            loss = self.loss(predicts, labels)
+            if self.loss_type == "attention":
+                label = labels['label_out']
+            else:
+                label = labels['label']
+            outputs = {'total_loss':loss, 'decoded_out':\
+                decoded_out, 'label':label}
+            return loader, outputs
+        elif mode == "export":
+            return [image, {'decoded_out': decoded_out}]
+        else:
+            return loader, {'decoded_out': decoded_out}
--- a/ppocr/modeling/backbones/det_mobilenet_v3.py
+++ b/ppocr/modeling/backbones/det_mobilenet_v3.py
@ -0,0 +1,251 @@
+#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import paddle.fluid as fluid
+from paddle.fluid.initializer import MSRA
+from paddle.fluid.param_attr import ParamAttr
+
+__all__ = ['MobileNetV3']
+
+
+class MobileNetV3():
+    def __init__(self, params):
+        """
+        the MobilenetV3 backbone network for detection module.
+        Args:
+            params(dict): the super parameters for build network
+        """
+        self.scale = params['scale']
+        model_name = params['model_name']
+        self.inplanes = 16
+        if model_name == "large":
+            self.cfg = [
+                # k, exp, c,  se,     nl,  s,
+                [3, 16, 16, False, 'relu', 1],
+                [3, 64, 24, False, 'relu', 2],
+                [3, 72, 24, False, 'relu', 1],
+                [5, 72, 40, True, 'relu', 2],
+                [5, 120, 40, True, 'relu', 1],
+                [5, 120, 40, True, 'relu', 1],
+                [3, 240, 80, False, 'hard_swish', 2],
+                [3, 200, 80, False, 'hard_swish', 1],
+                [3, 184, 80, False, 'hard_swish', 1],
+                [3, 184, 80, False, 'hard_swish', 1],
+                [3, 480, 112, True, 'hard_swish', 1],
+                [3, 672, 112, True, 'hard_swish', 1],
+                [5, 672, 160, True, 'hard_swish', 2],
+                [5, 960, 160, True, 'hard_swish', 1],
+                [5, 960, 160, True, 'hard_swish', 1],
+            ]
+            self.cls_ch_squeeze = 960
+            self.cls_ch_expand = 1280
+        elif model_name == "small":
+            self.cfg = [
+                # k, exp, c,  se,     nl,  s,
+                [3, 16, 16, True, 'relu', 2],
+                [3, 72, 24, False, 'relu', 2],
+                [3, 88, 24, False, 'relu', 1],
+                [5, 96, 40, True, 'hard_swish', 2],
+                [5, 240, 40, True, 'hard_swish', 1],
+                [5, 240, 40, True, 'hard_swish', 1],
+                [5, 120, 48, True, 'hard_swish', 1],
+                [5, 144, 48, True, 'hard_swish', 1],
+                [5, 288, 96, True, 'hard_swish', 2],
+                [5, 576, 96, True, 'hard_swish', 1],
+                [5, 576, 96, True, 'hard_swish', 1],
+            ]
+            self.cls_ch_squeeze = 576
+            self.cls_ch_expand = 1280
+        else:
+            raise NotImplementedError("mode[" + model_name +
+                                      "_model] is not implemented!")
+
+        supported_scale = [0.35, 0.5, 0.75, 1.0, 1.25]
+        assert self.scale in supported_scale, \
+            "supported scale are {} but input scale is {}".format(supported_scale, self.scale)
+
+    def __call__(self, input):
+        scale = self.scale
+        inplanes = self.inplanes
+        cfg = self.cfg
+        cls_ch_squeeze = self.cls_ch_squeeze
+        cls_ch_expand = self.cls_ch_expand
+        #conv1
+        conv = self.conv_bn_layer(
+            input,
+            filter_size=3,
+            num_filters=self.make_divisible(inplanes * scale),
+            stride=2,
+            padding=1,
+            num_groups=1,
+            if_act=True,
+            act='hard_swish',
+            name='conv1')
+        i = 0
+        inplanes = self.make_divisible(inplanes * scale)
+        outs = []
+        for layer_cfg in cfg:
+            if layer_cfg[5] == 2 and i > 2:
+                outs.append(conv)
+            conv = self.residual_unit(
+                input=conv,
+                num_in_filter=inplanes,
+                num_mid_filter=self.make_divisible(scale * layer_cfg[1]),
+                num_out_filter=self.make_divisible(scale * layer_cfg[2]),
+                act=layer_cfg[4],
+                stride=layer_cfg[5],
+                filter_size=layer_cfg[0],
+                use_se=layer_cfg[3],
+                name='conv' + str(i + 2))
+            inplanes = self.make_divisible(scale * layer_cfg[2])
+            i += 1
+
+        conv = self.conv_bn_layer(
+            input=conv,
+            filter_size=1,
+            num_filters=self.make_divisible(scale * cls_ch_squeeze),
+            stride=1,
+            padding=0,
+            num_groups=1,
+            if_act=True,
+            act='hard_swish',
+            name='conv_last')
+        outs.append(conv)
+        return outs
+
+    def conv_bn_layer(self,
+                      input,
+                      filter_size,
+                      num_filters,
+                      stride,
+                      padding,
+                      num_groups=1,
+                      if_act=True,
+                      act=None,
+                      name=None,
+                      use_cudnn=True,
+                      res_last_bn_init=False):
+        conv = fluid.layers.conv2d(
+            input=input,
+            num_filters=num_filters,
+            filter_size=filter_size,
+            stride=stride,
+            padding=padding,
+            groups=num_groups,
+            act=None,
+            use_cudnn=use_cudnn,
+            param_attr=ParamAttr(name=name + '_weights'),
+            bias_attr=False)
+        bn_name = name + '_bn'
+        bn = fluid.layers.batch_norm(
+            input=conv,
+            param_attr=ParamAttr(
+                name=bn_name + "_scale",
+                regularizer=fluid.regularizer.L2DecayRegularizer(
+                    regularization_coeff=0.0)),
+            bias_attr=ParamAttr(
+                name=bn_name + "_offset",
+                regularizer=fluid.regularizer.L2DecayRegularizer(
+                    regularization_coeff=0.0)),
+            moving_mean_name=bn_name + '_mean',
+            moving_variance_name=bn_name + '_variance')
+        if if_act:
+            if act == 'relu':
+                bn = fluid.layers.relu(bn)
+            elif act == 'hard_swish':
+                bn = fluid.layers.hard_swish(bn)
+        return bn
+
+    def make_divisible(self, v, divisor=8, min_value=None):
+        if min_value is None:
+            min_value = divisor
+        new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
+        if new_v < 0.9 * v:
+            new_v += divisor
+        return new_v
+
+    def se_block(self, input, num_out_filter, ratio=4, name=None):
+        num_mid_filter = num_out_filter // ratio
+        pool = fluid.layers.pool2d(
+            input=input, pool_type='avg', global_pooling=True, use_cudnn=False)
+        conv1 = fluid.layers.conv2d(
+            input=pool,
+            filter_size=1,
+            num_filters=num_mid_filter,
+            act='relu',
+            param_attr=ParamAttr(name=name + '_1_weights'),
+            bias_attr=ParamAttr(name=name + '_1_offset'))
+        conv2 = fluid.layers.conv2d(
+            input=conv1,
+            filter_size=1,
+            num_filters=num_out_filter,
+            act='hard_sigmoid',
+            param_attr=ParamAttr(name=name + '_2_weights'),
+            bias_attr=ParamAttr(name=name + '_2_offset'))
+        scale = fluid.layers.elementwise_mul(x=input, y=conv2, axis=0)
+        return scale
+
+    def residual_unit(self,
+                      input,
+                      num_in_filter,
+                      num_mid_filter,
+                      num_out_filter,
+                      stride,
+                      filter_size,
+                      act=None,
+                      use_se=False,
+                      name=None):
+
+        conv0 = self.conv_bn_layer(
+            input=input,
+            filter_size=1,
+            num_filters=num_mid_filter,
+            stride=1,
+            padding=0,
+            if_act=True,
+            act=act,
+            name=name + '_expand')
+
+        conv1 = self.conv_bn_layer(
+            input=conv0,
+            filter_size=filter_size,
+            num_filters=num_mid_filter,
+            stride=stride,
+            padding=int((filter_size - 1) // 2),
+            if_act=True,
+            act=act,
+            num_groups=num_mid_filter,
+            use_cudnn=False,
+            name=name + '_depthwise')
+        if use_se:
+            conv1 = self.se_block(
+                input=conv1, num_out_filter=num_mid_filter, name=name + '_se')
+
+        conv2 = self.conv_bn_layer(
+            input=conv1,
+            filter_size=1,
+            num_filters=num_out_filter,
+            stride=1,
+            padding=0,
+            if_act=False,
+            name=name + '_linear',
+            res_last_bn_init=True)
+        if num_in_filter != num_out_filter or stride != 1:
+            return conv2
+        else:
+            return fluid.layers.elementwise_add(x=input, y=conv2, act=None)
--- a/ppocr/modeling/backbones/det_resnet_vd.py
+++ b/ppocr/modeling/backbones/det_resnet_vd.py
@ -0,0 +1,252 @@
+#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import paddle.fluid as fluid
+from paddle.fluid.param_attr import ParamAttr
+
+__all__ = ["ResNet"]
+
+
+class ResNet(object):
+    def __init__(self, params):
+        """
+        the Resnet backbone network for detection module.
+        Args:
+            params(dict): the super parameters for network build
+        """
+        self.layers = params['layers']
+        supported_layers = [18, 34, 50, 101, 152]
+        assert self.layers in supported_layers, \
+            "supported layers are {} but input layer is {}".format(supported_layers, self.layers)
+        self.is_3x3 = True
+
+    def __call__(self, input):
+        layers = self.layers
+        is_3x3 = self.is_3x3
+        if layers == 18:
+            depth = [2, 2, 2, 2]
+        elif layers == 34 or layers == 50:
+            depth = [3, 4, 6, 3]
+        elif layers == 101:
+            depth = [3, 4, 23, 3]
+        elif layers == 152:
+            depth = [3, 8, 36, 3]
+        elif layers == 200:
+            depth = [3, 12, 48, 3]
+        num_filters = [64, 128, 256, 512]
+        outs = []
+
+        if is_3x3 == False:
+            conv = self.conv_bn_layer(
+                input=input,
+                num_filters=64,
+                filter_size=7,
+                stride=2,
+                act='relu')
+        else:
+            conv = self.conv_bn_layer(
+                input=input,
+                num_filters=32,
+                filter_size=3,
+                stride=2,
+                act='relu',
+                name='conv1_1')
+            conv = self.conv_bn_layer(
+                input=conv,
+                num_filters=32,
+                filter_size=3,
+                stride=1,
+                act='relu',
+                name='conv1_2')
+            conv = self.conv_bn_layer(
+                input=conv,
+                num_filters=64,
+                filter_size=3,
+                stride=1,
+                act='relu',
+                name='conv1_3')
+
+        conv = fluid.layers.pool2d(
+            input=conv,
+            pool_size=3,
+            pool_stride=2,
+            pool_padding=1,
+            pool_type='max')
+
+        if layers >= 50:
+            for block in range(len(depth)):
+                for i in range(depth[block]):
+                    if layers in [101, 152, 200] and block == 2:
+                        if i == 0:
+                            conv_name = "res" + str(block + 2) + "a"
+                        else:
+                            conv_name = "res" + str(block + 2) + "b" + str(i)
+                    else:
+                        conv_name = "res" + str(block + 2) + chr(97 + i)
+                    conv = self.bottleneck_block(
+                        input=conv,
+                        num_filters=num_filters[block],
+                        stride=2 if i == 0 and block != 0 else 1,
+                        if_first=block == i == 0,
+                        name=conv_name)
+                outs.append(conv)
+        else:
+            for block in range(len(depth)):
+                for i in range(depth[block]):
+                    conv_name = "res" + str(block + 2) + chr(97 + i)
+                    conv = self.basic_block(
+                        input=conv,
+                        num_filters=num_filters[block],
+                        stride=2 if i == 0 and block != 0 else 1,
+                        if_first=block == i == 0,
+                        name=conv_name)
+                outs.append(conv)
+        return outs
+
+    def conv_bn_layer(self,
+                      input,
+                      num_filters,
+                      filter_size,
+                      stride=1,
+                      groups=1,
+                      act=None,
+                      name=None):
+        conv = fluid.layers.conv2d(
+            input=input,
+            num_filters=num_filters,
+            filter_size=filter_size,
+            stride=stride,
+            padding=(filter_size - 1) // 2,
+            groups=groups,
+            act=None,
+            param_attr=ParamAttr(name=name + "_weights"),
+            bias_attr=False)
+        if name == "conv1":
+            bn_name = "bn_" + name
+        else:
+            bn_name = "bn" + name[3:]
+        return fluid.layers.batch_norm(
+            input=conv,
+            act=act,
+            param_attr=ParamAttr(name=bn_name + '_scale'),
+            bias_attr=ParamAttr(bn_name + '_offset'),
+            moving_mean_name=bn_name + '_mean',
+            moving_variance_name=bn_name + '_variance')
+
+    def conv_bn_layer_new(self,
+                          input,
+                          num_filters,
+                          filter_size,
+                          stride=1,
+                          groups=1,
+                          act=None,
+                          name=None):
+        pool = fluid.layers.pool2d(
+            input=input,
+            pool_size=2,
+            pool_stride=2,
+            pool_padding=0,
+            pool_type='avg',
+            ceil_mode=True)
+
+        conv = fluid.layers.conv2d(
+            input=pool,
+            num_filters=num_filters,
+            filter_size=filter_size,
+            stride=1,
+            padding=(filter_size - 1) // 2,
+            groups=groups,
+            act=None,
+            param_attr=ParamAttr(name=name + "_weights"),
+            bias_attr=False)
+        if name == "conv1":
+            bn_name = "bn_" + name
+        else:
+            bn_name = "bn" + name[3:]
+        return fluid.layers.batch_norm(
+            input=conv,
+            act=act,
+            param_attr=ParamAttr(name=bn_name + '_scale'),
+            bias_attr=ParamAttr(bn_name + '_offset'),
+            moving_mean_name=bn_name + '_mean',
+            moving_variance_name=bn_name + '_variance')
+
+    def shortcut(self, input, ch_out, stride, name, if_first=False):
+        ch_in = input.shape[1]
+        if ch_in != ch_out or stride != 1:
+            if if_first:
+                return self.conv_bn_layer(input, ch_out, 1, stride, name=name)
+            else:
+                return self.conv_bn_layer_new(
+                    input, ch_out, 1, stride, name=name)
+        elif if_first:
+            return self.conv_bn_layer(input, ch_out, 1, stride, name=name)
+        else:
+            return input
+
+    def bottleneck_block(self, input, num_filters, stride, name, if_first):
+        conv0 = self.conv_bn_layer(
+            input=input,
+            num_filters=num_filters,
+            filter_size=1,
+            act='relu',
+            name=name + "_branch2a")
+        conv1 = self.conv_bn_layer(
+            input=conv0,
+            num_filters=num_filters,
+            filter_size=3,
+            stride=stride,
+            act='relu',
+            name=name + "_branch2b")
+        conv2 = self.conv_bn_layer(
+            input=conv1,
+            num_filters=num_filters * 4,
+            filter_size=1,
+            act=None,
+            name=name + "_branch2c")
+
+        short = self.shortcut(
+            input,
+            num_filters * 4,
+            stride,
+            if_first=if_first,
+            name=name + "_branch1")
+
+        return fluid.layers.elementwise_add(x=short, y=conv2, act='relu')
+
+    def basic_block(self, input, num_filters, stride, name, if_first):
+        conv0 = self.conv_bn_layer(
+            input=input,
+            num_filters=num_filters,
+            filter_size=3,
+            act='relu',
+            stride=stride,
+            name=name + "_branch2a")
+        conv1 = self.conv_bn_layer(
+            input=conv0,
+            num_filters=num_filters,
+            filter_size=3,
+            act=None,
+            name=name + "_branch2b")
+        short = self.shortcut(
+            input,
+            num_filters,
+            stride,
+            if_first=if_first,
+            name=name + "_branch1")
+        return fluid.layers.elementwise_add(x=short, y=conv1, act='relu')
--- a/ppocr/modeling/backbones/rec_mobilenet_v3.py
+++ b/ppocr/modeling/backbones/rec_mobilenet_v3.py
@ -0,0 +1,255 @@
+#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import paddle.fluid as fluid
+from paddle.fluid.initializer import MSRA
+from paddle.fluid.param_attr import ParamAttr
+
+__all__ = [
+    'MobileNetV3', 'MobileNetV3_small_x0_35', 'MobileNetV3_small_x0_5',
+    'MobileNetV3_small_x0_75', 'MobileNetV3_small_x1_0',
+    'MobileNetV3_small_x1_25', 'MobileNetV3_large_x0_35',
+    'MobileNetV3_large_x0_5', 'MobileNetV3_large_x0_75',
+    'MobileNetV3_large_x1_0', 'MobileNetV3_large_x1_25'
+]
+
+
+class MobileNetV3():
+    def __init__(self, params):
+        self.scale = params['scale']
+        model_name = params['model_name']
+        self.inplanes = 16
+        if model_name == "large":
+            self.cfg = [
+                # k, exp, c,  se,     nl,  s,
+                [3, 16, 16, False, 'relu', 1],
+                [3, 64, 24, False, 'relu', (2, 1)],
+                [3, 72, 24, False, 'relu', 1],
+                [5, 72, 40, True, 'relu', (2, 1)],
+                [5, 120, 40, True, 'relu', 1],
+                [5, 120, 40, True, 'relu', 1],
+                [3, 240, 80, False, 'hard_swish', 1],
+                [3, 200, 80, False, 'hard_swish', 1],
+                [3, 184, 80, False, 'hard_swish', 1],
+                [3, 184, 80, False, 'hard_swish', 1],
+                [3, 480, 112, True, 'hard_swish', 1],
+                [3, 672, 112, True, 'hard_swish', 1],
+                [5, 672, 160, True, 'hard_swish', (2, 1)],
+                [5, 960, 160, True, 'hard_swish', 1],
+                [5, 960, 160, True, 'hard_swish', 1],
+            ]
+            self.cls_ch_squeeze = 960
+            self.cls_ch_expand = 1280
+        elif model_name == "small":
+            self.cfg = [
+                # k, exp, c,  se,     nl,  s,
+                [3, 16, 16, True, 'relu', (2, 1)],
+                [3, 72, 24, False, 'relu', (2, 1)],
+                [3, 88, 24, False, 'relu', 1],
+                [5, 96, 40, True, 'hard_swish', (2, 1)],
+                [5, 240, 40, True, 'hard_swish', 1],
+                [5, 240, 40, True, 'hard_swish', 1],
+                [5, 120, 48, True, 'hard_swish', 1],
+                [5, 144, 48, True, 'hard_swish', 1],
+                [5, 288, 96, True, 'hard_swish', (2, 1)],
+                [5, 576, 96, True, 'hard_swish', 1],
+                [5, 576, 96, True, 'hard_swish', 1],
+            ]
+            self.cls_ch_squeeze = 576
+            self.cls_ch_expand = 1280
+        else:
+            raise NotImplementedError("mode[" + model_name +
+                                      "_model] is not implemented!")
+
+        supported_scale = [0.35, 0.5, 0.75, 1.0, 1.25]
+        assert self.scale in supported_scale, \
+            "supported scale are {} but input scale is {}".format(supported_scale, scale)
+
+    def __call__(self, input):
+        scale = self.scale
+        inplanes = self.inplanes
+        cfg = self.cfg
+        cls_ch_squeeze = self.cls_ch_squeeze
+        cls_ch_expand = self.cls_ch_expand
+        #conv1
+        conv = self.conv_bn_layer(
+            input,
+            filter_size=3,
+            num_filters=self.make_divisible(inplanes * scale),
+            stride=2,
+            padding=1,
+            num_groups=1,
+            if_act=True,
+            act='hard_swish',
+            name='conv1')
+        i = 0
+        inplanes = self.make_divisible(inplanes * scale)
+        for layer_cfg in cfg:
+            conv = self.residual_unit(
+                input=conv,
+                num_in_filter=inplanes,
+                num_mid_filter=self.make_divisible(scale * layer_cfg[1]),
+                num_out_filter=self.make_divisible(scale * layer_cfg[2]),
+                act=layer_cfg[4],
+                stride=layer_cfg[5],
+                filter_size=layer_cfg[0],
+                use_se=layer_cfg[3],
+                name='conv' + str(i + 2))
+            inplanes = self.make_divisible(scale * layer_cfg[2])
+            i += 1
+
+        conv = self.conv_bn_layer(
+            input=conv,
+            filter_size=1,
+            num_filters=self.make_divisible(scale * cls_ch_squeeze),
+            stride=1,
+            padding=0,
+            num_groups=1,
+            if_act=True,
+            act='hard_swish',
+            name='conv_last')
+
+        conv = fluid.layers.pool2d(
+            input=conv,
+            pool_size=2,
+            pool_stride=2,
+            pool_padding=0,
+            pool_type='max')
+        return conv
+
+    def conv_bn_layer(self,
+                      input,
+                      filter_size,
+                      num_filters,
+                      stride,
+                      padding,
+                      num_groups=1,
+                      if_act=True,
+                      act=None,
+                      name=None,
+                      use_cudnn=True,
+                      res_last_bn_init=False):
+        conv = fluid.layers.conv2d(
+            input=input,
+            num_filters=num_filters,
+            filter_size=filter_size,
+            stride=stride,
+            padding=padding,
+            groups=num_groups,
+            act=None,
+            use_cudnn=use_cudnn,
+            param_attr=ParamAttr(name=name + '_weights'),
+            bias_attr=False)
+        bn_name = name + '_bn'
+        bn = fluid.layers.batch_norm(
+            input=conv,
+            param_attr=ParamAttr(
+                name=bn_name + "_scale",
+                regularizer=fluid.regularizer.L2DecayRegularizer(
+                    regularization_coeff=0.0)),
+            bias_attr=ParamAttr(
+                name=bn_name + "_offset",
+                regularizer=fluid.regularizer.L2DecayRegularizer(
+                    regularization_coeff=0.0)),
+            moving_mean_name=bn_name + '_mean',
+            moving_variance_name=bn_name + '_variance')
+        if if_act:
+            if act == 'relu':
+                bn = fluid.layers.relu(bn)
+            elif act == 'hard_swish':
+                bn = fluid.layers.hard_swish(bn)
+        return bn
+
+    def make_divisible(self, v, divisor=8, min_value=None):
+        if min_value is None:
+            min_value = divisor
+        new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
+        if new_v < 0.9 * v:
+            new_v += divisor
+        return new_v
+
+    def se_block(self, input, num_out_filter, ratio=4, name=None):
+        num_mid_filter = num_out_filter // ratio
+        pool = fluid.layers.pool2d(
+            input=input, pool_type='avg', global_pooling=True, use_cudnn=False)
+        conv1 = fluid.layers.conv2d(
+            input=pool,
+            filter_size=1,
+            num_filters=num_mid_filter,
+            act='relu',
+            param_attr=ParamAttr(name=name + '_1_weights'),
+            bias_attr=ParamAttr(name=name + '_1_offset'))
+        conv2 = fluid.layers.conv2d(
+            input=conv1,
+            filter_size=1,
+            num_filters=num_out_filter,
+            act='hard_sigmoid',
+            param_attr=ParamAttr(name=name + '_2_weights'),
+            bias_attr=ParamAttr(name=name + '_2_offset'))
+        scale = fluid.layers.elementwise_mul(x=input, y=conv2, axis=0)
+        return scale
+
+    def residual_unit(self,
+                      input,
+                      num_in_filter,
+                      num_mid_filter,
+                      num_out_filter,
+                      stride,
+                      filter_size,
+                      act=None,
+                      use_se=False,
+                      name=None):
+
+        conv0 = self.conv_bn_layer(
+            input=input,
+            filter_size=1,
+            num_filters=num_mid_filter,
+            stride=1,
+            padding=0,
+            if_act=True,
+            act=act,
+            name=name + '_expand')
+
+        conv1 = self.conv_bn_layer(
+            input=conv0,
+            filter_size=filter_size,
+            num_filters=num_mid_filter,
+            stride=stride,
+            padding=int((filter_size - 1) // 2),
+            if_act=True,
+            act=act,
+            num_groups=num_mid_filter,
+            use_cudnn=False,
+            name=name + '_depthwise')
+        if use_se:
+            conv1 = self.se_block(
+                input=conv1, num_out_filter=num_mid_filter, name=name + '_se')
+
+        conv2 = self.conv_bn_layer(
+            input=conv1,
+            filter_size=1,
+            num_filters=num_out_filter,
+            stride=1,
+            padding=0,
+            if_act=False,
+            name=name + '_linear',
+            res_last_bn_init=True)
+        if num_in_filter != num_out_filter or stride != 1:
+            return conv2
+        else:
+            return fluid.layers.elementwise_add(x=input, y=conv2, act=None)
--- a/ppocr/modeling/backbones/rec_resnet_vd.py
+++ b/ppocr/modeling/backbones/rec_resnet_vd.py
@ -0,0 +1,271 @@
+#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import math
+
+import paddle
+import paddle.fluid as fluid
+from paddle.fluid.param_attr import ParamAttr
+
+__all__ = [
+    "ResNet", "ResNet18_vd", "ResNet34_vd", "ResNet50_vd", "ResNet101_vd",
+    "ResNet152_vd", "ResNet200_vd"
+]
+
+
+class ResNet():
+    def __init__(self, params):
+        self.layers = params['layers']
+        self.is_3x3 = True
+        supported_layers = [18, 34, 50, 101, 152, 200]
+        assert self.layers in supported_layers, \
+            "supported layers are {} but input layer is {}".format(supported_layers, self.layers)
+
+    def __call__(self, input):
+        is_3x3 = self.is_3x3
+        layers = self.layers
+
+        if layers == 18:
+            depth = [2, 2, 2, 2]
+        elif layers == 34 or layers == 50:
+            depth = [3, 4, 6, 3]
+        elif layers == 101:
+            depth = [3, 4, 23, 3]
+        elif layers == 152:
+            depth = [3, 8, 36, 3]
+        elif layers == 200:
+            depth = [3, 12, 48, 3]
+        num_filters = [64, 128, 256, 512]
+        if is_3x3 == False:
+            conv = self.conv_bn_layer(
+                input=input,
+                num_filters=64,
+                filter_size=7,
+                stride=1,
+                act='relu')
+        else:
+            conv = self.conv_bn_layer(
+                input=input,
+                num_filters=32,
+                filter_size=3,
+                stride=1,
+                act='relu',
+                name='conv1_1')
+            conv = self.conv_bn_layer(
+                input=conv,
+                num_filters=32,
+                filter_size=3,
+                stride=1,
+                act='relu',
+                name='conv1_2')
+            conv = self.conv_bn_layer(
+                input=conv,
+                num_filters=64,
+                filter_size=3,
+                stride=1,
+                act='relu',
+                name='conv1_3')
+
+        conv = fluid.layers.pool2d(
+            input=conv,
+            pool_size=3,
+            pool_stride=2,
+            pool_padding=1,
+            pool_type='max')
+
+        if layers >= 50:
+            for block in range(len(depth)):
+                for i in range(depth[block]):
+                    if layers in [101, 152, 200] and block == 2:
+                        if i == 0:
+                            conv_name = "res" + str(block + 2) + "a"
+                        else:
+                            conv_name = "res" + str(block + 2) + "b" + str(i)
+                    else:
+                        conv_name = "res" + str(block + 2) + chr(97 + i)
+
+                    if i == 0 and block != 0:
+                        stride = (2, 1)
+                    else:
+                        stride = (1, 1)
+
+                    conv = self.bottleneck_block(
+                        input=conv,
+                        num_filters=num_filters[block],
+                        stride=stride,
+                        if_first=block == i == 0,
+                        name=conv_name)
+        else:
+            for block in range(len(depth)):
+                for i in range(depth[block]):
+                    conv_name = "res" + str(block + 2) + chr(97 + i)
+
+                    if i == 0 and block != 0:
+                        stride = (2, 1)
+                    else:
+                        stride = (1, 1)
+
+                    conv = self.basic_block(
+                        input=conv,
+                        num_filters=num_filters[block],
+                        stride=stride,
+                        if_first=block == i == 0,
+                        name=conv_name)
+
+        conv = fluid.layers.pool2d(
+            input=conv,
+            pool_size=2,
+            pool_stride=2,
+            pool_padding=0,
+            pool_type='max')
+
+        return conv
+
+    def conv_bn_layer(self,
+                      input,
+                      num_filters,
+                      filter_size,
+                      stride=1,
+                      groups=1,
+                      act=None,
+                      name=None):
+        conv = fluid.layers.conv2d(
+            input=input,
+            num_filters=num_filters,
+            filter_size=filter_size,
+            stride=stride,
+            padding=(filter_size - 1) // 2,
+            groups=groups,
+            act=None,
+            param_attr=ParamAttr(name=name + "_weights"),
+            bias_attr=False)
+        if name == "conv1":
+            bn_name = "bn_" + name
+        else:
+            bn_name = "bn" + name[3:]
+        return fluid.layers.batch_norm(
+            input=conv,
+            act=act,
+            param_attr=ParamAttr(name=bn_name + '_scale'),
+            bias_attr=ParamAttr(bn_name + '_offset'),
+            moving_mean_name=bn_name + '_mean',
+            moving_variance_name=bn_name + '_variance')
+
+    def conv_bn_layer_new(self,
+                          input,
+                          num_filters,
+                          filter_size,
+                          stride=1,
+                          groups=1,
+                          act=None,
+                          name=None):
+        pool = fluid.layers.pool2d(
+            input=input,
+            pool_size=stride,
+            pool_stride=stride,
+            pool_padding=0,
+            pool_type='avg',
+            ceil_mode=True)
+
+        conv = fluid.layers.conv2d(
+            input=pool,
+            num_filters=num_filters,
+            filter_size=filter_size,
+            stride=1,
+            padding=(filter_size - 1) // 2,
+            groups=groups,
+            act=None,
+            param_attr=ParamAttr(name=name + "_weights"),
+            bias_attr=False)
+
+        if name == "conv1":
+            bn_name = "bn_" + name
+        else:
+            bn_name = "bn" + name[3:]
+        return fluid.layers.batch_norm(
+            input=conv,
+            act=act,
+            param_attr=ParamAttr(name=bn_name + '_scale'),
+            bias_attr=ParamAttr(bn_name + '_offset'),
+            moving_mean_name=bn_name + '_mean',
+            moving_variance_name=bn_name + '_variance')
+
+    def shortcut(self, input, ch_out, stride, name, if_first=False):
+        ch_in = input.shape[1]
+        if ch_in != ch_out or stride[0] != 1:
+            if if_first:
+                return self.conv_bn_layer(input, ch_out, 1, stride, name=name)
+            else:
+                return self.conv_bn_layer_new(
+                    input, ch_out, 1, stride, name=name)
+        elif if_first:
+            return self.conv_bn_layer(input, ch_out, 1, stride, name=name)
+        else:
+            return input
+
+    def bottleneck_block(self, input, num_filters, stride, name, if_first):
+        conv0 = self.conv_bn_layer(
+            input=input,
+            num_filters=num_filters,
+            filter_size=1,
+            act='relu',
+            name=name + "_branch2a")
+        conv1 = self.conv_bn_layer(
+            input=conv0,
+            num_filters=num_filters,
+            filter_size=3,
+            stride=stride,
+            act='relu',
+            name=name + "_branch2b")
+        conv2 = self.conv_bn_layer(
+            input=conv1,
+            num_filters=num_filters * 4,
+            filter_size=1,
+            act=None,
+            name=name + "_branch2c")
+
+        short = self.shortcut(
+            input,
+            num_filters * 4,
+            stride,
+            if_first=if_first,
+            name=name + "_branch1")
+
+        return fluid.layers.elementwise_add(x=short, y=conv2, act='relu')
+
+    def basic_block(self, input, num_filters, stride, name, if_first):
+        conv0 = self.conv_bn_layer(
+            input=input,
+            num_filters=num_filters,
+            filter_size=3,
+            act='relu',
+            stride=stride,
+            name=name + "_branch2a")
+        conv1 = self.conv_bn_layer(
+            input=conv0,
+            num_filters=num_filters,
+            filter_size=3,
+            act=None,
+            name=name + "_branch2b")
+        short = self.shortcut(
+            input,
+            num_filters,
+            stride,
+            if_first=if_first,
+            name=name + "_branch1")
+        return fluid.layers.elementwise_add(x=short, y=conv1, act='relu')
--- a/ppocr/modeling/common_functions.py
+++ b/ppocr/modeling/common_functions.py
@ -0,0 +1,95 @@
+#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import paddle
+import paddle.fluid as fluid
+from paddle.fluid.param_attr import ParamAttr
+import math
+
+
+def get_para_bias_attr(l2_decay, k, name):
+    regularizer = fluid.regularizer.L2Decay(l2_decay)
+    stdv = 1.0 / math.sqrt(k * 1.0)
+    initializer = fluid.initializer.Uniform(-stdv, stdv)
+    para_attr = fluid.ParamAttr(
+        regularizer=regularizer, initializer=initializer, name=name + "_w_attr")
+    bias_attr = fluid.ParamAttr(
+        regularizer=regularizer, initializer=initializer, name=name + "_b_attr")
+    return [para_attr, bias_attr]
+
+
+def conv_bn_layer(input,
+                  num_filters,
+                  filter_size,
+                  stride=1,
+                  groups=1,
+                  act=None,
+                  name=None):
+    conv = fluid.layers.conv2d(
+        input=input,
+        num_filters=num_filters,
+        filter_size=filter_size,
+        stride=stride,
+        padding=(filter_size - 1) // 2,
+        groups=groups,
+        act=None,
+        param_attr=ParamAttr(name=name + "_weights"),
+        bias_attr=False,
+        name=name + '.conv2d')
+
+    bn_name = "bn_" + name
+    return fluid.layers.batch_norm(
+        input=conv,
+        act=act,
+        name=bn_name + '.output',
+        param_attr=ParamAttr(name=bn_name + '_scale'),
+        bias_attr=ParamAttr(bn_name + '_offset'),
+        moving_mean_name=bn_name + '_mean',
+        moving_variance_name=bn_name + '_variance')
+
+
+def deconv_bn_layer(input,
+                    num_filters,
+                    filter_size=4,
+                    stride=2,
+                    act='relu',
+                    name=None):
+    deconv = fluid.layers.conv2d_transpose(
+        input=input,
+        num_filters=num_filters,
+        filter_size=filter_size,
+        stride=stride,
+        padding=1,
+        act=None,
+        param_attr=ParamAttr(name=name + "_weights"),
+        bias_attr=False,
+        name=name + '.deconv2d')
+    bn_name = "bn_" + name
+    return fluid.layers.batch_norm(
+        input=deconv,
+        act=act,
+        name=bn_name + '.output',
+        param_attr=ParamAttr(name=bn_name + '_scale'),
+        bias_attr=ParamAttr(bn_name + '_offset'),
+        moving_mean_name=bn_name + '_mean',
+        moving_variance_name=bn_name + '_variance')
+
+
+def create_tmp_var(program, name, dtype, shape, lod_level=0):
+    return program.current_block().create_var(
+        name=name, dtype=dtype, shape=shape, lod_level=lod_level)
--- a/ppocr/modeling/heads/det_db_head.py
+++ b/ppocr/modeling/heads/det_db_head.py
@ -0,0 +1,206 @@
+#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import math
+
+import paddle.fluid as fluid
+
+
+class DBHead(object):
+    """
+    Differentiable Binarization (DB) for text detection:
+        see https://arxiv.org/abs/1911.08947
+    args:
+        params(dict): super parameters for build DB network
+    """
+
+    def __init__(self, params):
+        self.k = params['k']
+        self.inner_channels = params['inner_channels']
+        self.C, self.H, self.W = params['image_shape']
+        print(self.C, self.H, self.W)
+
+    def binarize(self, x):
+        conv1 = fluid.layers.conv2d(
+            input=x,
+            num_filters=self.inner_channels // 4,
+            filter_size=3,
+            padding=1,
+            param_attr=fluid.initializer.MSRAInitializer(uniform=False),
+            bias_attr=False)
+        conv_bn1 = fluid.layers.batch_norm(
+            input=conv1,
+            param_attr=fluid.initializer.ConstantInitializer(value=1.0),
+            bias_attr=fluid.initializer.ConstantInitializer(value=1e-4),
+            act="relu")
+        conv2 = fluid.layers.conv2d_transpose(
+            input=conv_bn1,
+            num_filters=self.inner_channels // 4,
+            filter_size=2,
+            stride=2,
+            param_attr=fluid.initializer.MSRAInitializer(uniform=False),
+            bias_attr=self._get_bias_attr(0.0004, conv_bn1.shape[1], "conv2"),
+            act=None)
+        conv_bn2 = fluid.layers.batch_norm(
+            input=conv2,
+            param_attr=fluid.initializer.ConstantInitializer(value=1.0),
+            bias_attr=fluid.initializer.ConstantInitializer(value=1e-4),
+            act="relu")
+        conv3 = fluid.layers.conv2d_transpose(
+            input=conv_bn2,
+            num_filters=1,
+            filter_size=2,
+            stride=2,
+            param_attr=fluid.initializer.MSRAInitializer(uniform=False),
+            bias_attr=self._get_bias_attr(0.0004, conv_bn2.shape[1], "conv3"),
+            act=None)
+        out = fluid.layers.sigmoid(conv3)
+        return out
+
+    def thresh(self, x):
+        conv1 = fluid.layers.conv2d(
+            input=x,
+            num_filters=self.inner_channels // 4,
+            filter_size=3,
+            padding=1,
+            param_attr=fluid.initializer.MSRAInitializer(uniform=False),
+            bias_attr=False)
+        conv_bn1 = fluid.layers.batch_norm(
+            input=conv1,
+            param_attr=fluid.initializer.ConstantInitializer(value=1.0),
+            bias_attr=fluid.initializer.ConstantInitializer(value=1e-4),
+            act="relu")
+        conv2 = fluid.layers.conv2d_transpose(
+            input=conv_bn1,
+            num_filters=self.inner_channels // 4,
+            filter_size=2,
+            stride=2,
+            param_attr=fluid.initializer.MSRAInitializer(uniform=False),
+            bias_attr=self._get_bias_attr(0.0004, conv_bn1.shape[1], "conv2"),
+            act=None)
+        conv_bn2 = fluid.layers.batch_norm(
+            input=conv2,
+            param_attr=fluid.initializer.ConstantInitializer(value=1.0),
+            bias_attr=fluid.initializer.ConstantInitializer(value=1e-4),
+            act="relu")
+        conv3 = fluid.layers.conv2d_transpose(
+            input=conv_bn2,
+            num_filters=1,
+            filter_size=2,
+            stride=2,
+            param_attr=fluid.initializer.MSRAInitializer(uniform=False),
+            bias_attr=self._get_bias_attr(0.0004, conv_bn2.shape[1], "conv3"),
+            act=None)
+        out = fluid.layers.sigmoid(conv3)
+        return out
+
+    def _get_bias_attr(self, l2_decay, k, name, gradient_clip=None):
+        regularizer = fluid.regularizer.L2Decay(l2_decay)
+        stdv = 1.0 / math.sqrt(k * 1.0)
+        initializer = fluid.initializer.Uniform(-stdv, stdv)
+        bias_attr = fluid.ParamAttr(
+            regularizer=regularizer,
+            gradient_clip=gradient_clip,
+            initializer=initializer,
+            name=name + "_b_attr")
+        return bias_attr
+
+    def step_function(self, x, y):
+        return fluid.layers.reciprocal(1 + fluid.layers.exp(-self.k * (x - y)))
+
+    def __call__(self, conv_features, mode="train"):
+        c2, c3, c4, c5 = conv_features
+        param_attr = fluid.initializer.MSRAInitializer(uniform=False)
+        in5 = fluid.layers.conv2d(
+            input=c5,
+            num_filters=self.inner_channels,
+            filter_size=1,
+            param_attr=param_attr,
+            bias_attr=False)
+        in4 = fluid.layers.conv2d(
+            input=c4,
+            num_filters=self.inner_channels,
+            filter_size=1,
+            param_attr=param_attr,
+            bias_attr=False)
+        in3 = fluid.layers.conv2d(
+            input=c3,
+            num_filters=self.inner_channels,
+            filter_size=1,
+            param_attr=param_attr,
+            bias_attr=False)
+        in2 = fluid.layers.conv2d(
+            input=c2,
+            num_filters=self.inner_channels,
+            filter_size=1,
+            param_attr=param_attr,
+            bias_attr=False)
+
+        out4 = fluid.layers.elementwise_add(
+            x=fluid.layers.resize_nearest(
+                input=in5, scale=2), y=in4)  # 1/16
+        out3 = fluid.layers.elementwise_add(
+            x=fluid.layers.resize_nearest(
+                input=out4, scale=2), y=in3)  # 1/8
+        out2 = fluid.layers.elementwise_add(
+            x=fluid.layers.resize_nearest(
+                input=out3, scale=2), y=in2)  # 1/4
+
+        p5 = fluid.layers.conv2d(
+            input=in5,
+            num_filters=self.inner_channels // 4,
+            filter_size=3,
+            padding=1,
+            param_attr=param_attr,
+            bias_attr=False)
+        p5 = fluid.layers.resize_nearest(input=p5, scale=8)
+        p4 = fluid.layers.conv2d(
+            input=out4,
+            num_filters=self.inner_channels // 4,
+            filter_size=3,
+            padding=1,
+            param_attr=param_attr,
+            bias_attr=False)
+        p4 = fluid.layers.resize_nearest(input=p4, scale=4)
+        p3 = fluid.layers.conv2d(
+            input=out3,
+            num_filters=self.inner_channels // 4,
+            filter_size=3,
+            padding=1,
+            param_attr=param_attr,
+            bias_attr=False)
+        p3 = fluid.layers.resize_nearest(input=p3, scale=2)
+        p2 = fluid.layers.conv2d(
+            input=out2,
+            num_filters=self.inner_channels // 4,
+            filter_size=3,
+            padding=1,
+            param_attr=param_attr,
+            bias_attr=False)
+
+        fuse = fluid.layers.concat(input=[p5, p4, p3, p2], axis=1)
+        shrink_maps = self.binarize(fuse)
+        if mode != "train":
+            return shrink_maps
+        threshold_maps = self.thresh(fuse)
+        binary_maps = self.step_function(shrink_maps, threshold_maps)
+        y = fluid.layers.concat(
+            input=[shrink_maps, threshold_maps, binary_maps], axis=1)
+        predicts = {}
+        predicts['maps'] = y
+        return predicts
--- a/ppocr/modeling/heads/det_east_head.py
+++ b/ppocr/modeling/heads/det_east_head.py
@ -0,0 +1,116 @@
+#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import paddle.fluid as fluid
+from ..common_functions import conv_bn_layer, deconv_bn_layer
+
+
+class EASTHead(object):
+    """
+    EAST: An Efficient and Accurate Scene Text Detector
+        see arxiv: https://arxiv.org/abs/1704.03155
+    args:
+        params(dict): the super parameters for network build
+    """
+
+    def __init__(self, params):
+
+        self.model_name = params['model_name']
+
+    def unet_fusion(self, inputs):
+        f = inputs[::-1]
+        if self.model_name == "large":
+            num_outputs = [128, 128, 128, 128]
+        else:
+            num_outputs = [64, 64, 64, 64]
+        g = [None, None, None, None]
+        h = [None, None, None, None]
+        for i in range(4):
+            if i == 0:
+                h[i] = f[i]
+            else:
+                h[i] = fluid.layers.concat([g[i - 1], f[i]], axis=1)
+                h[i] = conv_bn_layer(
+                    input=h[i],
+                    num_filters=num_outputs[i],
+                    filter_size=3,
+                    stride=1,
+                    act='relu',
+                    name="unet_h_%d" % (i))
+            if i <= 2:
+                #can be replaced with unpool
+                g[i] = deconv_bn_layer(
+                    input=h[i],
+                    num_filters=num_outputs[i],
+                    name="unet_g_%d" % (i))
+            else:
+                g[i] = conv_bn_layer(
+                    input=h[i],
+                    num_filters=num_outputs[i],
+                    filter_size=3,
+                    stride=1,
+                    act='relu',
+                    name="unet_g_%d" % (i))
+        return g[3]
+
+    def detector_header(self, f_common):
+        if self.model_name == "large":
+            num_outputs = [128, 64, 1, 8]
+        else:
+            num_outputs = [64, 32, 1, 8]
+        f_det = conv_bn_layer(
+            input=f_common,
+            num_filters=num_outputs[0],
+            filter_size=3,
+            stride=1,
+            act='relu',
+            name="det_head1")
+        f_det = conv_bn_layer(
+            input=f_det,
+            num_filters=num_outputs[1],
+            filter_size=3,
+            stride=1,
+            act='relu',
+            name="det_head2")
+        #f_score
+        f_score = conv_bn_layer(
+            input=f_det,
+            num_filters=num_outputs[2],
+            filter_size=1,
+            stride=1,
+            act=None,
+            name="f_score")
+        f_score = fluid.layers.sigmoid(f_score)
+        #f_geo
+        f_geo = conv_bn_layer(
+            input=f_det,
+            num_filters=num_outputs[3],
+            filter_size=1,
+            stride=1,
+            act=None,
+            name="f_geo")
+        f_geo = (fluid.layers.sigmoid(f_geo) - 0.5) * 2 * 800
+        return f_score, f_geo
+
+    def __call__(self, inputs):
+        f_common = self.unet_fusion(inputs)
+        f_score, f_geo = self.detector_header(f_common)
+        predicts = {}
+        predicts['f_score'] = f_score
+        predicts['f_geo'] = f_geo
+        return predicts
--- a/ppocr/modeling/heads/rec_attention_head.py
+++ b/ppocr/modeling/heads/rec_attention_head.py
@ -0,0 +1,232 @@
+#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import math
+
+import paddle
+import paddle.fluid as fluid
+import paddle.fluid.layers as layers
+from .rec_seq_encoder import SequenceEncoder
+import numpy as np
+
+
+class AttentionPredict(object):
+    def __init__(self, params):
+        super(AttentionPredict, self).__init__()
+        self.char_num = params['char_num']
+        self.encoder = SequenceEncoder(params)
+        self.decoder_size = params['Attention']['decoder_size']
+        self.word_vector_dim = params['Attention']['word_vector_dim']
+        self.encoder_type = params['encoder_type']
+        self.max_length = params['max_text_length']
+
+    def simple_attention(self, encoder_vec, encoder_proj, decoder_state,
+                         decoder_size):
+        decoder_state_proj = layers.fc(input=decoder_state,
+                                       size=decoder_size,
+                                       bias_attr=False,
+                                       name="decoder_state_proj_fc")
+        decoder_state_expand = layers.sequence_expand(
+            x=decoder_state_proj, y=encoder_proj)
+        concated = layers.elementwise_add(encoder_proj, decoder_state_expand)
+        concated = layers.tanh(x=concated)
+        attention_weights = layers.fc(input=concated,
+                                      size=1,
+                                      act=None,
+                                      bias_attr=False,
+                                      name="attention_weights_fc")
+        attention_weights = layers.sequence_softmax(input=attention_weights)
+        weigths_reshape = layers.reshape(x=attention_weights, shape=[-1])
+        scaled = layers.elementwise_mul(
+            x=encoder_vec, y=weigths_reshape, axis=0)
+        context = layers.sequence_pool(input=scaled, pool_type='sum')
+        return context
+
+    def gru_decoder_with_attention(self, target_embedding, encoder_vec,
+                                   encoder_proj, decoder_boot, decoder_size,
+                                   char_num):
+        rnn = layers.DynamicRNN()
+        with rnn.block():
+            current_word = rnn.step_input(target_embedding)
+            encoder_vec = rnn.static_input(encoder_vec)
+            encoder_proj = rnn.static_input(encoder_proj)
+            hidden_mem = rnn.memory(init=decoder_boot, need_reorder=True)
+            context = self.simple_attention(encoder_vec, encoder_proj,
+                                            hidden_mem, decoder_size)
+            fc_1 = layers.fc(input=context,
+                             size=decoder_size * 3,
+                             bias_attr=False,
+                             name="rnn_fc1")
+            fc_2 = layers.fc(input=current_word,
+                             size=decoder_size * 3,
+                             bias_attr=False,
+                             name="rnn_fc2")
+            decoder_inputs = fc_1 + fc_2
+            h, _, _ = layers.gru_unit(
+                input=decoder_inputs, hidden=hidden_mem, size=decoder_size * 3)
+            rnn.update_memory(hidden_mem, h)
+            out = layers.fc(input=h,
+                            size=char_num,
+                            bias_attr=True,
+                            act='softmax',
+                            name="rnn_out_fc")
+            rnn.output(out)
+        return rnn()
+
+    def gru_attention_infer(self, decoder_boot, max_length, char_num,
+                            word_vector_dim, encoded_vector, encoded_proj,
+                            decoder_size):
+        init_state = decoder_boot
+        beam_size = 1
+        array_len = layers.fill_constant(
+            shape=[1], dtype='int64', value=max_length)
+        counter = layers.zeros(shape=[1], dtype='int64', force_cpu=True)
+
+        # fill the first element with init_state
+        state_array = layers.create_array('float32')
+        layers.array_write(init_state, array=state_array, i=counter)
+
+        # ids, scores as memory
+        ids_array = layers.create_array('int64')
+        scores_array = layers.create_array('float32')
+        rois_shape = layers.shape(init_state)
+        batch_size = layers.slice(
+            rois_shape, axes=[0], starts=[0], ends=[1]) + 1
+        lod_level = layers.range(
+            start=0, end=batch_size, step=1, dtype=batch_size.dtype)
+
+        init_ids = layers.fill_constant_batch_size_like(
+            input=init_state, shape=[-1, 1], value=0, dtype='int64')
+        init_ids = layers.lod_reset(init_ids, lod_level)
+        init_ids = layers.lod_append(init_ids, lod_level)
+
+        init_scores = layers.fill_constant_batch_size_like(
+            input=init_state, shape=[-1, 1], value=1, dtype='float32')
+        init_scores = layers.lod_reset(init_scores, init_ids)
+        layers.array_write(init_ids, array=ids_array, i=counter)
+        layers.array_write(init_scores, array=scores_array, i=counter)
+
+        full_ids = fluid.layers.fill_constant_batch_size_like(
+            input=init_state, shape=[-1, 1], dtype='int64', value=1)
+
+        cond = layers.less_than(x=counter, y=array_len)
+        while_op = layers.While(cond=cond)
+        with while_op.block():
+            pre_ids = layers.array_read(array=ids_array, i=counter)
+            pre_state = layers.array_read(array=state_array, i=counter)
+            pre_score = layers.array_read(array=scores_array, i=counter)
+            pre_ids_emb = layers.embedding(
+                input=pre_ids,
+                size=[char_num, word_vector_dim],
+                dtype='float32')
+
+            context = self.simple_attention(encoded_vector, encoded_proj,
+                                            pre_state, decoder_size)
+
+            # expand the recursive_sequence_lengths of pre_state 
+            # to be the same with pre_score
+            pre_state_expanded = layers.sequence_expand(pre_state, pre_score)
+            context_expanded = layers.sequence_expand(context, pre_score)
+
+            fc_1 = layers.fc(input=context_expanded,
+                             size=decoder_size * 3,
+                             bias_attr=False,
+                             name="rnn_fc1")
+
+            fc_2 = layers.fc(input=pre_ids_emb,
+                             size=decoder_size * 3,
+                             bias_attr=False,
+                             name="rnn_fc2")
+
+            decoder_inputs = fc_1 + fc_2
+            current_state, _, _ = layers.gru_unit(
+                input=decoder_inputs,
+                hidden=pre_state_expanded,
+                size=decoder_size * 3)
+            current_state_with_lod = layers.lod_reset(
+                x=current_state, y=pre_score)
+            # use score to do beam search
+            current_score = layers.fc(input=current_state_with_lod,
+                                      size=char_num,
+                                      bias_attr=True,
+                                      act='softmax',
+                                      name="rnn_out_fc")
+            topk_scores, topk_indices = layers.topk(current_score, k=beam_size)
+
+            new_ids = fluid.layers.concat([full_ids, topk_indices], axis=1)
+            fluid.layers.assign(new_ids, full_ids)
+
+            layers.increment(x=counter, value=1, in_place=True)
+
+            # update the memories
+            layers.array_write(current_state, array=state_array, i=counter)
+            layers.array_write(topk_indices, array=ids_array, i=counter)
+            layers.array_write(topk_scores, array=scores_array, i=counter)
+
+            # update the break condition: 
+            # up to the max length or all candidates of
+            # source sentences have ended.
+            length_cond = layers.less_than(x=counter, y=array_len)
+            finish_cond = layers.logical_not(layers.is_empty(x=topk_indices))
+            layers.logical_and(x=length_cond, y=finish_cond, out=cond)
+        return full_ids
+
+    def __call__(self, inputs, labels=None, mode=None):
+        encoder_features = self.encoder(inputs)
+        char_num = self.char_num
+        word_vector_dim = self.word_vector_dim
+        decoder_size = self.decoder_size
+
+        if self.encoder_type == "reshape":
+            encoder_input = encoder_features
+            encoded_vector = encoder_features
+        else:
+            encoder_input = encoder_features[1]
+            encoded_vector = layers.concat(encoder_features, axis=1)
+        encoded_proj = layers.fc(input=encoded_vector,
+                                 size=decoder_size,
+                                 bias_attr=False,
+                                 name="encoded_proj_fc")
+        backward_first = layers.sequence_pool(
+            input=encoder_input, pool_type='first')
+        decoder_boot = layers.fc(input=backward_first,
+                                 size=decoder_size,
+                                 bias_attr=False,
+                                 act="relu",
+                                 name='decoder_boot')
+
+        if mode == "train":
+            label_in = labels['label_in']
+            label_out = labels['label_out']
+            label_in = layers.cast(x=label_in, dtype='int64')
+            trg_embedding = layers.embedding(
+                input=label_in,
+                size=[char_num, word_vector_dim],
+                dtype='float32')
+            predict = self.gru_decoder_with_attention(
+                trg_embedding, encoded_vector, encoded_proj, decoder_boot,
+                decoder_size, char_num)
+            _, decoded_out = layers.topk(input=predict, k=1)
+            decoded_out = layers.lod_reset(decoded_out, y=label_out)
+            predicts = {'predict': predict, 'decoded_out': decoded_out}
+        else:
+            ids = self.gru_attention_infer(
+                decoder_boot, self.max_length, char_num, word_vector_dim,
+                encoded_vector, encoded_proj, decoder_size)
+            predicts = {'decoded_out': ids}
+        return predicts
--- a/ppocr/modeling/heads/rec_ctc_head.py
+++ b/ppocr/modeling/heads/rec_ctc_head.py
@ -0,0 +1,51 @@
+#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import math
+
+import paddle
+import paddle.fluid as fluid
+from paddle.fluid.param_attr import ParamAttr
+from .rec_seq_encoder import SequenceEncoder
+from ..common_functions import get_para_bias_attr
+import numpy as np
+
+
+class CTCPredict(object):
+    def __init__(self, params):
+        super(CTCPredict, self).__init__()
+        self.char_num = params['char_num']
+        self.encoder = SequenceEncoder(params)
+        self.encoder_type = params['encoder_type']
+
+    def __call__(self, inputs, labels=None, mode=None):
+        encoder_features = self.encoder(inputs)
+        if self.encoder_type != "reshape":
+            encoder_features = fluid.layers.concat(encoder_features, axis=1)
+        name = "ctc_fc"
+        para_attr, bias_attr = get_para_bias_attr(
+            l2_decay=0.0004, k=encoder_features.shape[1], name=name)
+        predict = fluid.layers.fc(input=encoder_features,
+                                  size=self.char_num + 1,
+                                  param_attr=para_attr,
+                                  bias_attr=bias_attr,
+                                  name=name)
+        decoded_out = fluid.layers.ctc_greedy_decoder(
+            input=predict, blank=self.char_num)
+        predicts = {'predict': predict, 'decoded_out': decoded_out}
+        return predicts
--- a/ppocr/modeling/heads/rec_seq_encoder.py
+++ b/ppocr/modeling/heads/rec_seq_encoder.py
@ -0,0 +1,100 @@
+#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import math
+import paddle.fluid as fluid
+import paddle.fluid.layers as layers
+
+
+class EncoderWithReshape(object):
+    def __init__(self, params):
+        super(EncoderWithReshape, self).__init__()
+
+    def __call__(self, inputs):
+        sliced_feature = layers.im2sequence(
+            input=inputs,
+            stride=[1, 1],
+            filter_size=[inputs.shape[2], 1],
+            name="sliced_feature")
+        return sliced_feature
+
+
+class EncoderWithRNN(object):
+    def __init__(self, params):
+        super(EncoderWithRNN, self).__init__()
+        self.rnn_hidden_size = params['SeqRNN']['hidden_size']
+
+    def __call__(self, inputs):
+        lstm_list = []
+        name_prefix = "lstm"
+        rnn_hidden_size = self.rnn_hidden_size
+        for no in range(1, 3):
+            if no == 1:
+                is_reverse = False
+            else:
+                is_reverse = True
+            name = "%s_st1_fc%d" % (name_prefix, no)
+            fc = layers.fc(input=inputs,
+                           size=rnn_hidden_size * 4,
+                           param_attr=fluid.ParamAttr(name=name + "_w"),
+                           bias_attr=fluid.ParamAttr(name=name + "_b"),
+                           name=name)
+            name = "%s_st1_out%d" % (name_prefix, no)
+            lstm, _ = layers.dynamic_lstm(
+                input=fc,
+                size=rnn_hidden_size * 4,
+                is_reverse=is_reverse,
+                param_attr=fluid.ParamAttr(name=name + "_w"),
+                bias_attr=fluid.ParamAttr(name=name + "_b"),
+                use_peepholes=False)
+            name = "%s_st2_fc%d" % (name_prefix, no)
+            fc = layers.fc(input=lstm,
+                           size=rnn_hidden_size * 4,
+                           param_attr=fluid.ParamAttr(name=name + "_w"),
+                           bias_attr=fluid.ParamAttr(name=name + "_b"),
+                           name=name)
+            name = "%s_st2_out%d" % (name_prefix, no)
+            lstm, _ = layers.dynamic_lstm(
+                input=fc,
+                size=rnn_hidden_size * 4,
+                is_reverse=is_reverse,
+                param_attr=fluid.ParamAttr(name=name + "_w"),
+                bias_attr=fluid.ParamAttr(name=name + "_b"),
+                use_peepholes=False)
+            lstm_list.append(lstm)
+        return lstm_list
+
+
+class SequenceEncoder(object):
+    def __init__(self, params):
+        super(SequenceEncoder, self).__init__()
+        self.encoder_type = params['encoder_type']
+        self.encoder_reshape = EncoderWithReshape(params)
+        if self.encoder_type == "rnn":
+            self.encoder_rnn = EncoderWithRNN(params)
+
+    def __call__(self, inputs):
+        if self.encoder_type == "reshape":
+            encoder_features = self.encoder_reshape(inputs)
+        elif self.encoder_type == "rnn":
+            inputs = self.encoder_reshape(inputs)
+            encoder_features = self.encoder_rnn(inputs)
+        else:
+            assert False, "Unsupport encoder_type:%s"\
+                % self.encoder_type
+        return encoder_features
--- a/ppocr/modeling/losses/det_basic_loss.py
+++ b/ppocr/modeling/losses/det_basic_loss.py
@ -0,0 +1,116 @@
+#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+
+import paddle.fluid as fluid
+
+
+def BalanceLoss(pred,
+                gt,
+                mask,
+                balance_loss=True,
+                main_loss_type="DiceLoss",
+                negative_ratio=3,
+                return_origin=False,
+                eps=1e-6):
+    """
+    The BalanceLoss for Differentiable Binarization text detection
+    args:
+        pred (variable): predicted feature maps.
+        gt (variable): ground truth feature maps.
+        mask (variable): masked maps.
+        balance_loss (bool): whether balance loss or not, default is True
+        main_loss_type (str): can only be one of ['CrossEntropy','DiceLoss',
+            'Euclidean','BCELoss', 'MaskL1Loss'], default is  'DiceLoss'.
+        negative_ratio (int|float): float, default is 3.
+        return_origin (bool): whether return unbalanced loss or not, default is False.
+        eps (float): default is 1e-6.
+    return: (variable) balanced loss
+    """
+    positive = gt * mask
+    negative = (1 - gt) * mask
+
+    positive_count = fluid.layers.reduce_sum(positive)
+    positive_count_int = fluid.layers.cast(positive_count, dtype=np.int32)
+    negative_count = min(
+        fluid.layers.reduce_sum(negative), positive_count * negative_ratio)
+    negative_count_int = fluid.layers.cast(negative_count, dtype=np.int32)
+
+    if main_loss_type == "CrossEntropy":
+        loss = fluid.layers.cross_entropy(input=pred, label=gt, soft_label=True)
+        loss = fluid.layers.reduce_mean(loss)
+    elif main_loss_type == "Euclidean":
+        loss = fluid.layers.square(pred - gt)
+        loss = fluid.layers.reduce_mean(loss)
+    elif main_loss_type == "DiceLoss":
+        loss = DiceLoss(pred, gt, mask)
+    elif main_loss_type == "BCELoss":
+        loss = fluid.layers.sigmoid_cross_entropy_with_logits(pred, label=gt)
+    elif main_loss_type == "MaskL1Loss":
+        loss = MaskL1Loss(pred, gt, mask)
+    else:
+        loss_type = [
+            'CrossEntropy', 'DiceLoss', 'Euclidean', 'BCELoss', 'MaskL1Loss'
+        ]
+        raise Exception("main_loss_type in BalanceLoss() can only be one of {}".
+                        format(loss_type))
+
+    if not balance_loss:
+        return loss
+
+    positive_loss = positive * loss
+    negative_loss = negative * loss
+    negative_loss = fluid.layers.reshape(negative_loss, shape=[-1])
+    negative_loss, _ = fluid.layers.topk(negative_loss, k=negative_count_int)
+    balance_loss = (fluid.layers.reduce_sum(positive_loss) +
+                    fluid.layers.reduce_sum(negative_loss)) / (
+                        positive_count + negative_count + eps)
+
+    if return_origin:
+        return balance_loss, loss
+    return balance_loss
+
+
+def DiceLoss(pred, gt, mask, weights=None, eps=1e-6):
+    """
+    DiceLoss function.
+    """
+
+    assert pred.shape == gt.shape
+    assert pred.shape == mask.shape
+    if weights is not None:
+        assert weights.shape == mask.shape
+        mask = weights * mask
+    intersection = fluid.layers.reduce_sum(pred * gt * mask)
+
+    union = fluid.layers.reduce_sum(pred * mask) + fluid.layers.reduce_sum(
+        gt * mask) + eps
+    loss = 1 - 2.0 * intersection / union
+    assert loss <= 1
+    return loss
+
+
+def MaskL1Loss(pred, gt, mask, eps=1e-6):
+    """
+    Mask L1 Loss
+    """
+    loss = fluid.layers.reduce_sum((fluid.layers.abs(pred - gt) * mask)) / (
+        fluid.layers.reduce_sum(mask) + eps)
+    loss = fluid.layers.reduce_mean(loss)
+    return loss
--- a/ppocr/modeling/losses/det_db_loss.py
+++ b/ppocr/modeling/losses/det_db_loss.py
@ -0,0 +1,68 @@
+#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from .det_basic_loss import BalanceLoss, MaskL1Loss, DiceLoss
+
+
+class DBLoss(object):
+    """
+    Differentiable Binarization (DB) Loss Function
+    args:
+        param (dict): the super paramter for DB Loss
+    """
+
+    def __init__(self, params):
+        super(DBLoss, self).__init__()
+        self.balance_loss = params['balance_loss']
+        self.main_loss_type = params['main_loss_type']
+
+        self.alpha = params['alpha']
+        self.beta = params['beta']
+        self.ohem_ratio = params['ohem_ratio']
+
+    def __call__(self, predicts, labels):
+        label_shrink_map = labels['shrink_map']
+        label_shrink_mask = labels['shrink_mask']
+        label_threshold_map = labels['threshold_map']
+        label_threshold_mask = labels['threshold_mask']
+        pred = predicts['maps']
+        shrink_maps = pred[:, 0, :, :]
+        threshold_maps = pred[:, 1, :, :]
+        binary_maps = pred[:, 2, :, :]
+
+        loss_shrink_maps = BalanceLoss(
+            shrink_maps,
+            label_shrink_map,
+            label_shrink_mask,
+            balance_loss=self.balance_loss,
+            main_loss_type=self.main_loss_type,
+            negative_ratio=self.ohem_ratio)
+        loss_threshold_maps = MaskL1Loss(threshold_maps, label_threshold_map,
+                                         label_threshold_mask)
+        loss_binary_maps = DiceLoss(binary_maps, label_shrink_map,
+                                    label_shrink_mask)
+        loss_shrink_maps = self.alpha * loss_shrink_maps
+        loss_threshold_maps = self.beta * loss_threshold_maps
+
+        loss_all = loss_shrink_maps + loss_threshold_maps\
+            + loss_binary_maps
+        losses = {'total_loss':loss_all,\
+            "loss_shrink_maps":loss_shrink_maps,\
+            "loss_threshold_maps":loss_threshold_maps,\
+            "loss_binary_maps":loss_binary_maps}
+        return losses
--- a/ppocr/modeling/losses/det_east_loss.py
+++ b/ppocr/modeling/losses/det_east_loss.py
@ -0,0 +1,61 @@
+#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import paddle.fluid as fluid
+
+
+class EASTLoss(object):
+    """
+    EAST Loss function
+    """
+
+    def __init__(self, params=None):
+        super(EASTLoss, self).__init__()
+
+    def __call__(self, predicts, labels):
+        f_score = predicts['f_score']
+        f_geo = predicts['f_geo']
+        l_score = labels['score']
+        l_geo = labels['geo']
+        l_mask = labels['mask']
+        ##dice_loss
+        intersection = fluid.layers.reduce_sum(f_score * l_score * l_mask)
+        union = fluid.layers.reduce_sum(f_score * l_mask)\
+            + fluid.layers.reduce_sum(l_score * l_mask)
+        dice_loss = 1 - 2 * intersection / (union + 1e-5)
+        #smoooth_l1_loss
+        channels = 8
+        l_geo_split = fluid.layers.split(
+            l_geo, num_or_sections=channels + 1, dim=1)
+        f_geo_split = fluid.layers.split(f_geo, num_or_sections=channels, dim=1)
+        smooth_l1 = 0
+        for i in range(0, channels):
+            geo_diff = l_geo_split[i] - f_geo_split[i]
+            abs_geo_diff = fluid.layers.abs(geo_diff)
+            smooth_l1_sign = fluid.layers.less_than(abs_geo_diff, l_score)
+            smooth_l1_sign = fluid.layers.cast(smooth_l1_sign, dtype='float32')
+            in_loss = abs_geo_diff * abs_geo_diff * smooth_l1_sign + \
+                (abs_geo_diff - 0.5) * (1.0 - smooth_l1_sign)
+            out_loss = l_geo_split[-1] / channels * in_loss * l_score
+            smooth_l1 += out_loss
+        smooth_l1_loss = fluid.layers.reduce_mean(smooth_l1 * l_score)
+        dice_loss = dice_loss * 0.01
+        total_loss = dice_loss + smooth_l1_loss
+        losses = {'total_loss':total_loss, "dice_loss":dice_loss,\
+            "smooth_l1_loss":smooth_l1_loss}
+        return losses
--- a/ppocr/modeling/losses/rec_attention_loss.py
+++ b/ppocr/modeling/losses/rec_attention_loss.py
@ -0,0 +1,38 @@
+#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import math
+
+import paddle
+import paddle.fluid as fluid
+from paddle.fluid.param_attr import ParamAttr
+import numpy as np
+
+
+class AttentionLoss(object):
+    def __init__(self, params):
+        super(AttentionLoss, self).__init__()
+        self.char_num = params['char_num']
+
+    def __call__(self, predicts, labels):
+        predict = predicts['predict']
+        label_out = labels['label_out']
+        label_out = fluid.layers.cast(x=label_out, dtype='int64')
+        cost = fluid.layers.cross_entropy(input=predict, label=label_out)
+        sum_cost = fluid.layers.reduce_sum(cost)
+        return sum_cost
--- a/ppocr/modeling/losses/rec_ctc_loss.py
+++ b/ppocr/modeling/losses/rec_ctc_loss.py
@ -0,0 +1,36 @@
+#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import math
+
+import paddle
+import paddle.fluid as fluid
+
+
+class CTCLoss(object):
+    def __init__(self, params):
+        super(CTCLoss, self).__init__()
+        self.char_num = params['char_num']
+
+    def __call__(self, predicts, labels):
+        predict = predicts['predict']
+        label = labels['label']
+        cost = fluid.layers.warpctc(
+            input=predict, label=label, blank=self.char_num, norm_by_times=True)
+        sum_cost = fluid.layers.reduce_sum(cost)
+        return sum_cost
--- a/ppocr/modeling/stns/tps.py
+++ b/ppocr/modeling/stns/tps.py
@ -0,0 +1,261 @@
+#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import math
+
+import paddle.fluid as fluid
+import paddle.fluid.layers as layers
+from paddle.fluid.param_attr import ParamAttr
+import numpy as np
+
+
+class LocalizationNetwork(object):
+    def __init__(self, params):
+        super(LocalizationNetwork, self).__init__()
+        self.F = params['num_fiducial']
+        self.loc_lr = params['loc_lr']
+        self.model_name = params['model_name']
+
+    def conv_bn_layer(self,
+                      input,
+                      num_filters,
+                      filter_size,
+                      stride=1,
+                      groups=1,
+                      act=None,
+                      name=None):
+        conv = layers.conv2d(
+            input=input,
+            num_filters=num_filters,
+            filter_size=filter_size,
+            stride=stride,
+            padding=(filter_size - 1) // 2,
+            groups=groups,
+            act=None,
+            param_attr=ParamAttr(name=name + "_weights"),
+            bias_attr=False)
+        bn_name = "bn_" + name
+        return layers.batch_norm(
+            input=conv,
+            act=act,
+            param_attr=ParamAttr(name=bn_name + '_scale'),
+            bias_attr=ParamAttr(bn_name + '_offset'),
+            moving_mean_name=bn_name + '_mean',
+            moving_variance_name=bn_name + '_variance')
+
+    def get_initial_fiducials(self):
+        """ see RARE paper Fig. 6 (a) """
+        F = self.F
+        ctrl_pts_x = np.linspace(-1.0, 1.0, int(F / 2))
+        ctrl_pts_y_top = np.linspace(0.0, -1.0, num=int(F / 2))
+        ctrl_pts_y_bottom = np.linspace(1.0, 0.0, num=int(F / 2))
+        ctrl_pts_top = np.stack([ctrl_pts_x, ctrl_pts_y_top], axis=1)
+        ctrl_pts_bottom = np.stack([ctrl_pts_x, ctrl_pts_y_bottom], axis=1)
+        initial_bias = np.concatenate([ctrl_pts_top, ctrl_pts_bottom], axis=0)
+        return initial_bias
+
+    def __call__(self, image):
+        F = self.F
+        loc_lr = self.loc_lr
+        if self.model_name == "large":
+            num_filters_list = [64, 128, 256, 512]
+            fc_dim = 256
+        else:
+            num_filters_list = [16, 32, 64, 128]
+            fc_dim = 64
+        for fno in range(len(num_filters_list)):
+            num_filters = num_filters_list[fno]
+            name = "loc_conv%d" % fno
+            if fno == 0:
+                conv = self.conv_bn_layer(
+                    image, num_filters, 3, act='relu', name=name)
+            else:
+                conv = self.conv_bn_layer(
+                    pool, num_filters, 3, act='relu', name=name)
+
+            if fno == len(num_filters_list) - 1:
+                pool = layers.adaptive_pool2d(
+                    input=conv, pool_size=[1, 1], pool_type='avg')
+            else:
+                pool = layers.pool2d(
+                    input=conv,
+                    pool_size=2,
+                    pool_stride=2,
+                    pool_padding=0,
+                    pool_type='max')
+        name = "loc_fc1"
+        stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0)
+        fc1 = layers.fc(input=pool,
+                        size=fc_dim,
+                        param_attr=fluid.param_attr.ParamAttr(
+                            learning_rate=loc_lr,
+                            initializer=fluid.initializer.Uniform(-stdv, stdv),
+                            name=name + "_w"),
+                        act='relu',
+                        name=name)
+
+        initial_bias = self.get_initial_fiducials()
+        initial_bias = initial_bias.reshape(-1)
+        name = "loc_fc2"
+        param_attr = fluid.param_attr.ParamAttr(
+            learning_rate=loc_lr,
+            initializer=fluid.initializer.NumpyArrayInitializer(
+                np.zeros([fc_dim, F * 2])),
+            name=name + "_w")
+        bias_attr = fluid.param_attr.ParamAttr(
+            learning_rate=loc_lr,
+            initializer=fluid.initializer.NumpyArrayInitializer(initial_bias),
+            name=name + "_b")
+        fc2 = layers.fc(input=fc1,
+                        size=F * 2,
+                        param_attr=param_attr,
+                        bias_attr=bias_attr,
+                        name=name)
+        batch_C_prime = layers.reshape(x=fc2, shape=[-1, F, 2], inplace=False)
+        return batch_C_prime
+
+
+class GridGenerator(object):
+    def __init__(self, params):
+        super(GridGenerator, self).__init__()
+        self.eps = 1e-6
+        self.F = params['num_fiducial']
+
+    def build_C(self):
+        """ Return coordinates of fiducial points in I_r; C """
+        F = self.F
+        ctrl_pts_x = np.linspace(-1.0, 1.0, int(F / 2))
+        ctrl_pts_y_top = -1 * np.ones(int(F / 2))
+        ctrl_pts_y_bottom = np.ones(int(F / 2))
+        ctrl_pts_top = np.stack([ctrl_pts_x, ctrl_pts_y_top], axis=1)
+        ctrl_pts_bottom = np.stack([ctrl_pts_x, ctrl_pts_y_bottom], axis=1)
+        C = np.concatenate([ctrl_pts_top, ctrl_pts_bottom], axis=0)
+        return C  # F x 2
+
+    def build_P(self, I_r_size):
+        I_r_width, I_r_height = I_r_size
+        I_r_grid_x = (np.arange(-I_r_width, I_r_width, 2) + 1.0)\
+            / I_r_width  # self.I_r_width
+        I_r_grid_y = (np.arange(-I_r_height, I_r_height, 2) + 1.0)\
+            / I_r_height  # self.I_r_height
+        # P: self.I_r_width x self.I_r_height x 2
+        P = np.stack(np.meshgrid(I_r_grid_x, I_r_grid_y), axis=2)
+        # n (= self.I_r_width x self.I_r_height) x 2
+        return P.reshape([-1, 2])
+
+    def build_inv_delta_C(self, C):
+        """ Return inv_delta_C which is needed to calculate T """
+        F = self.F
+        hat_C = np.zeros((F, F), dtype=float)  # F x F
+        for i in range(0, F):
+            for j in range(i, F):
+                r = np.linalg.norm(C[i] - C[j])
+                hat_C[i, j] = r
+                hat_C[j, i] = r
+        np.fill_diagonal(hat_C, 1)
+        hat_C = (hat_C**2) * np.log(hat_C)
+        # print(C.shape, hat_C.shape)
+        delta_C = np.concatenate(  # F+3 x F+3
+            [
+                np.concatenate(
+                    [np.ones((F, 1)), C, hat_C], axis=1),  # F x F+3
+                np.concatenate(
+                    [np.zeros((2, 3)), np.transpose(C)], axis=1),  # 2 x F+3
+                np.concatenate(
+                    [np.zeros((1, 3)), np.ones((1, F))], axis=1)  # 1 x F+3
+            ],
+            axis=0)
+        inv_delta_C = np.linalg.inv(delta_C)
+        return inv_delta_C  # F+3 x F+3
+
+    def build_P_hat(self, C, P):
+        F = self.F
+        eps = self.eps
+        n = P.shape[0]  # n (= self.I_r_width x self.I_r_height)
+        #P_tile: n x 2 -> n x 1 x 2 -> n x F x 2
+        P_tile = np.tile(np.expand_dims(P, axis=1), (1, F, 1))
+        C_tile = np.expand_dims(C, axis=0)  # 1 x F x 2
+        P_diff = P_tile - C_tile  # n x F x 2
+        #rbf_norm: n x F
+        rbf_norm = np.linalg.norm(P_diff, ord=2, axis=2, keepdims=False)
+        #rbf: n x F
+        rbf = np.multiply(np.square(rbf_norm), np.log(rbf_norm + eps))
+        P_hat = np.concatenate([np.ones((n, 1)), P, rbf], axis=1)
+        return P_hat  # n x F+3
+
+    def get_expand_tensor(self, batch_C_prime):
+        name = "ex_fc"
+        initializer = fluid.initializer.ConstantInitializer(value=0.0)
+        param_attr = fluid.param_attr.ParamAttr(
+            learning_rate=0.0, initializer=initializer, name=name + "_w")
+        bias_attr = fluid.param_attr.ParamAttr(
+            learning_rate=0.0, initializer=initializer, name=name + "_b")
+        batch_C_ex_part_tensor = fluid.layers.fc(input=batch_C_prime,
+                                                 size=6,
+                                                 param_attr=param_attr,
+                                                 bias_attr=bias_attr,
+                                                 name=name)
+        batch_C_ex_part_tensor = fluid.layers.reshape(
+            x=batch_C_ex_part_tensor, shape=[-1, 3, 2])
+        return batch_C_ex_part_tensor
+
+    def __call__(self, batch_C_prime, I_r_size):
+        C = self.build_C()
+        P = self.build_P(I_r_size)
+        inv_delta_C = self.build_inv_delta_C(C).astype('float32')
+        P_hat = self.build_P_hat(C, P).astype('float32')
+
+        inv_delta_C_tensor = layers.create_tensor(dtype='float32')
+        layers.assign(inv_delta_C, inv_delta_C_tensor)
+        inv_delta_C_tensor.stop_gradient = True
+        P_hat_tensor = layers.create_tensor(dtype='float32')
+        layers.assign(P_hat, P_hat_tensor)
+        P_hat_tensor.stop_gradient = True
+
+        batch_C_ex_part_tensor = self.get_expand_tensor(batch_C_prime)
+        #         batch_C_ex_part_tensor = create_tmp_var(
+        #             fluid.default_main_program(),
+        #             name='batch_C_ex_part_tensor', 
+        #             dtype='float32', shape=[-1, 3, 2])
+        #         layers.py_func(func=get_batch_C_expand, 
+        #             x=[batch_C_prime], out=[batch_C_ex_part_tensor])
+
+        batch_C_ex_part_tensor.stop_gradient = True
+
+        batch_C_prime_with_zeros = layers.concat(
+            [batch_C_prime, batch_C_ex_part_tensor], axis=1)
+        batch_T = layers.matmul(inv_delta_C_tensor, batch_C_prime_with_zeros)
+        batch_P_prime = layers.matmul(P_hat_tensor, batch_T)
+        return batch_P_prime
+
+
+class TPS(object):
+    def __init__(self, params):
+        super(TPS, self).__init__()
+        self.loc_net = LocalizationNetwork(params)
+        self.grid_generator = GridGenerator(params)
+
+    def __call__(self, image):
+        batch_C_prime = self.loc_net(image)
+        I_r_size = [image.shape[3], image.shape[2]]
+        batch_P_prime = self.grid_generator(batch_C_prime, I_r_size)
+        batch_P_prime = layers.reshape(
+            x=batch_P_prime, shape=[-1, image.shape[2], image.shape[3], 2])
+        batch_I_r = layers.grid_sampler(x=image, grid=batch_P_prime)
+        image.stop_gradient = False
+        return batch_I_r
--- a/ppocr/optimizer.py
+++ b/ppocr/optimizer.py
@ -0,0 +1,36 @@
+#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import paddle.fluid as fluid
+
+
+def AdamDecay(params, parameter_list=None):
+    """
+    define optimizer function
+    args:
+        params(dict): the super parameters
+        parameter_list (list): list of Variable names to update to minimize loss
+    return:
+    """
+    base_lr = params['base_lr']
+    beta1 = params['beta1']
+    beta2 = params['beta2']
+    optimizer = fluid.optimizer.Adam(
+        learning_rate=base_lr,
+        beta1=beta1,
+        beta2=beta2,
+        parameter_list=parameter_list)
+    return optimizer
--- a/ppocr/postprocess/db_postprocess.py
+++ b/ppocr/postprocess/db_postprocess.py
@ -0,0 +1,152 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import paddle
+import paddle.fluid as fluid
+
+import numpy as np
+import string
+import cv2
+from shapely.geometry import Polygon
+import pyclipper
+
+
+class DBPostProcess(object):
+    """
+    The post process for Differentiable Binarization (DB).
+    """
+
+    def __init__(self, params):
+        self.thresh = params['thresh']
+        self.box_thresh = params['box_thresh']
+        self.max_candidates = params['max_candidates']
+        self.min_size = 3
+
+    def boxes_from_bitmap(self, pred, _bitmap, dest_width, dest_height):
+        '''
+        _bitmap: single map with shape (1, H, W),
+                whose values are binarized as {0, 1}
+        '''
+
+        bitmap = _bitmap
+        height, width = bitmap.shape
+
+        # img, contours, _ = cv2.findContours((bitmap * 255).astype(np.uint8), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
+        contours, _ = cv2.findContours((bitmap * 255).astype(np.uint8),
+                                       cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
+
+        num_contours = min(len(contours), self.max_candidates)
+        boxes = np.zeros((num_contours, 4, 2), dtype=np.int16)
+        scores = np.zeros((num_contours, ), dtype=np.float32)
+
+        for index in range(num_contours):
+            contour = contours[index]
+            points, sside = self.get_mini_boxes(contour)
+            if sside < self.min_size:
+                continue
+            points = np.array(points)
+            score = self.box_score_fast(pred, points.reshape(-1, 2))
+            if self.box_thresh > score:
+                continue
+
+            box = self.unclip(points).reshape(-1, 1, 2)
+            box, sside = self.get_mini_boxes(box)
+            if sside < self.min_size + 2:
+                continue
+            box = np.array(box)
+            if not isinstance(dest_width, int):
+                dest_width = dest_width.item()
+                dest_height = dest_height.item()
+
+            box[:, 0] = np.clip(
+                np.round(box[:, 0] / width * dest_width), 0, dest_width)
+            box[:, 1] = np.clip(
+                np.round(box[:, 1] / height * dest_height), 0, dest_height)
+            boxes[index, :, :] = box.astype(np.int16)
+            scores[index] = score
+        return boxes, scores
+
+    def unclip(self, box, unclip_ratio=1.5):
+        poly = Polygon(box)
+        distance = poly.area * unclip_ratio / poly.length
+        offset = pyclipper.PyclipperOffset()
+        offset.AddPath(box, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON)
+        expanded = np.array(offset.Execute(distance))
+        return expanded
+
+    def get_mini_boxes(self, contour):
+        bounding_box = cv2.minAreaRect(contour)
+        points = sorted(list(cv2.boxPoints(bounding_box)), key=lambda x: x[0])
+
+        index_1, index_2, index_3, index_4 = 0, 1, 2, 3
+        if points[1][1] > points[0][1]:
+            index_1 = 0
+            index_4 = 1
+        else:
+            index_1 = 1
+            index_4 = 0
+        if points[3][1] > points[2][1]:
+            index_2 = 2
+            index_3 = 3
+        else:
+            index_2 = 3
+            index_3 = 2
+
+        box = [
+            points[index_1], points[index_2], points[index_3], points[index_4]
+        ]
+        return box, min(bounding_box[1])
+
+    def box_score_fast(self, bitmap, _box):
+        h, w = bitmap.shape[:2]
+        box = _box.copy()
+        xmin = np.clip(np.floor(box[:, 0].min()).astype(np.int), 0, w - 1)
+        xmax = np.clip(np.ceil(box[:, 0].max()).astype(np.int), 0, w - 1)
+        ymin = np.clip(np.floor(box[:, 1].min()).astype(np.int), 0, h - 1)
+        ymax = np.clip(np.ceil(box[:, 1].max()).astype(np.int), 0, h - 1)
+
+        mask = np.zeros((ymax - ymin + 1, xmax - xmin + 1), dtype=np.uint8)
+        box[:, 0] = box[:, 0] - xmin
+        box[:, 1] = box[:, 1] - ymin
+        cv2.fillPoly(mask, box.reshape(1, -1, 2).astype(np.int32), 1)
+        return cv2.mean(bitmap[ymin:ymax + 1, xmin:xmax + 1], mask)[0]
+
+    def __call__(self, outs_dict, ratio_list):
+        pred = outs_dict['maps']
+        pred = pred[:, 0, :, :]
+        segmentation = pred > self.thresh
+
+        boxes_batch = []
+        for batch_index in range(pred.shape[0]):
+            height, width = pred.shape[-2:]
+            tmp_boxes, tmp_scores = self.boxes_from_bitmap(
+                pred[batch_index], segmentation[batch_index], width, height)
+
+            boxes = []
+            for k in range(len(tmp_boxes)):
+                if tmp_scores[k] > self.box_thresh:
+                    boxes.append(tmp_boxes[k])
+            if len(boxes) > 0:
+                boxes = np.array(boxes)
+
+                ratio_h, ratio_w = ratio_list[batch_index]
+                boxes[:, :, 0] = boxes[:, :, 0] / ratio_w
+                boxes[:, :, 1] = boxes[:, :, 1] / ratio_h
+
+            boxes_batch.append(boxes)
+        return boxes_batch
--- a/ppocr/postprocess/east_postprocess.py
+++ b/ppocr/postprocess/east_postprocess.py
@ -0,0 +1,121 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+from .locality_aware_nms import nms_locality
+import cv2
+
+
+class EASTPostPocess(object):
+    """
+    The post process for EAST.
+    """
+
+    def __init__(self, params):
+        self.score_thresh = params['score_thresh']
+        self.cover_thresh = params['cover_thresh']
+        self.nms_thresh = params['nms_thresh']
+
+    def restore_rectangle_quad(self, origin, geometry):
+        """
+        Restore rectangle from quadrangle.
+        """
+        # quad
+        origin_concat = np.concatenate(
+            (origin, origin, origin, origin), axis=1)  # (n, 8)
+        pred_quads = origin_concat - geometry
+        pred_quads = pred_quads.reshape((-1, 4, 2))  # (n, 4, 2)
+        return pred_quads
+
+    def detect(self,
+               score_map,
+               geo_map,
+               score_thresh=0.8,
+               cover_thresh=0.1,
+               nms_thresh=0.2):
+        """
+        restore text boxes from score map and geo map
+        """
+        score_map = score_map[0]
+        geo_map = np.swapaxes(geo_map, 1, 0)
+        geo_map = np.swapaxes(geo_map, 1, 2)
+        # filter the score map
+        xy_text = np.argwhere(score_map > score_thresh)
+        if len(xy_text) == 0:
+            return []
+        # sort the text boxes via the y axis
+        xy_text = xy_text[np.argsort(xy_text[:, 0])]
+        #restore quad proposals
+        text_box_restored = self.restore_rectangle_quad(
+            xy_text[:, ::-1] * 4, geo_map[xy_text[:, 0], xy_text[:, 1], :])
+        boxes = np.zeros((text_box_restored.shape[0], 9), dtype=np.float32)
+        boxes[:, :8] = text_box_restored.reshape((-1, 8))
+        boxes[:, 8] = score_map[xy_text[:, 0], xy_text[:, 1]]
+        boxes = nms_locality(boxes.astype(np.float64), nms_thresh)
+        if boxes.shape[0] == 0:
+            return []
+        # Here we filter some low score boxes by the average score map, 
+        #   this is different from the orginal paper.
+        for i, box in enumerate(boxes):
+            mask = np.zeros_like(score_map, dtype=np.uint8)
+            cv2.fillPoly(mask, box[:8].reshape(
+                (-1, 4, 2)).astype(np.int32) // 4, 1)
+            boxes[i, 8] = cv2.mean(score_map, mask)[0]
+        boxes = boxes[boxes[:, 8] > cover_thresh]
+        return boxes
+
+    def sort_poly(self, p):
+        """
+        Sort polygons.
+        """
+        min_axis = np.argmin(np.sum(p, axis=1))
+        p = p[[min_axis, (min_axis + 1) % 4,\
+            (min_axis + 2) % 4, (min_axis + 3) % 4]]
+        if abs(p[0, 0] - p[1, 0]) > abs(p[0, 1] - p[1, 1]):
+            return p
+        else:
+            return p[[0, 3, 2, 1]]
+
+    def __call__(self, outs_dict, ratio_list):
+        score_list = outs_dict['f_score']
+        geo_list = outs_dict['f_geo']
+        img_num = len(ratio_list)
+        dt_boxes_list = []
+        for ino in range(img_num):
+            score = score_list[ino]
+            geo = geo_list[ino]
+            boxes = self.detect(
+                score_map=score,
+                geo_map=geo,
+                score_thresh=self.score_thresh,
+                cover_thresh=self.cover_thresh,
+                nms_thresh=self.nms_thresh)
+            boxes_norm = []
+            if len(boxes) > 0:
+                ratio_h, ratio_w = ratio_list[ino]
+                boxes = boxes[:, :8].reshape((-1, 4, 2))
+                boxes[:, :, 0] /= ratio_w
+                boxes[:, :, 1] /= ratio_h
+                for i_box, box in enumerate(boxes):
+                    box = self.sort_poly(box.astype(np.int32))
+                    if np.linalg.norm(box[0] - box[1]) < 5 \
+                        or np.linalg.norm(box[3] - box[0]) < 5:
+                        continue
+                    boxes_norm.append(box)
+            dt_boxes_list.append(np.array(boxes_norm))
+        return dt_boxes_list
--- a/ppocr/postprocess/locality_aware_nms.py
+++ b/ppocr/postprocess/locality_aware_nms.py
@ -0,0 +1,199 @@
+"""
+Locality aware nms.
+"""
+
+import numpy as np
+from shapely.geometry import Polygon
+
+
+def intersection(g, p):
+    """
+    Intersection.
+    """
+    g = Polygon(g[:8].reshape((4, 2)))
+    p = Polygon(p[:8].reshape((4, 2)))
+    g = g.buffer(0)
+    p = p.buffer(0)
+    if not g.is_valid or not p.is_valid:
+        return 0
+    inter = Polygon(g).intersection(Polygon(p)).area
+    union = g.area + p.area - inter
+    if union == 0:
+        return 0
+    else:
+        return inter / union
+
+
+def intersection_iog(g, p):
+    """
+    Intersection_iog.
+    """
+    g = Polygon(g[:8].reshape((4, 2)))
+    p = Polygon(p[:8].reshape((4, 2)))
+    if not g.is_valid or not p.is_valid:
+        return 0
+    inter = Polygon(g).intersection(Polygon(p)).area
+    #union = g.area + p.area - inter
+    union = p.area
+    if union == 0:
+        print("p_area is very small")
+        return 0
+    else:
+        return inter / union
+
+
+def weighted_merge(g, p):
+    """
+    Weighted merge.
+    """
+    g[:8] = (g[8] * g[:8] + p[8] * p[:8]) / (g[8] + p[8])
+    g[8] = (g[8] + p[8])
+    return g
+
+
+def standard_nms(S, thres):
+    """
+    Standard nms.
+    """
+    order = np.argsort(S[:, 8])[::-1]
+    keep = []
+    while order.size > 0:
+        i = order[0]
+        keep.append(i)
+        ovr = np.array([intersection(S[i], S[t]) for t in order[1:]])
+
+        inds = np.where(ovr <= thres)[0]
+        order = order[inds + 1]
+
+    return S[keep]
+
+
+def standard_nms_inds(S, thres):
+    """
+    Standard nms, retun inds.
+    """
+    order = np.argsort(S[:, 8])[::-1]
+    keep = []
+    while order.size > 0:
+        i = order[0]
+        keep.append(i)
+        ovr = np.array([intersection(S[i], S[t]) for t in order[1:]])
+
+        inds = np.where(ovr <= thres)[0]
+        order = order[inds + 1]
+
+    return keep
+
+
+def nms(S, thres):
+    """
+    nms.
+    """
+    order = np.argsort(S[:, 8])[::-1]
+    keep = []
+    while order.size > 0:
+        i = order[0]
+        keep.append(i)
+        ovr = np.array([intersection(S[i], S[t]) for t in order[1:]])
+
+        inds = np.where(ovr <= thres)[0]
+        order = order[inds + 1]
+
+    return keep
+
+
+def soft_nms(boxes_in, Nt_thres=0.3, threshold=0.8, sigma=0.5, method=2):
+    """
+    soft_nms
+    :para boxes_in, N x 9 (coords + score)
+    :para threshould, eliminate cases min score(0.001)
+    :para Nt_thres, iou_threshi
+    :para sigma, gaussian weght
+    :method, linear or gaussian
+    """
+    boxes = boxes_in.copy()
+    N = boxes.shape[0]
+    if N is None or N < 1:
+        return np.array([])
+    pos, maxpos = 0, 0
+    weight = 0.0
+    inds = np.arange(N)
+    tbox, sbox = boxes[0].copy(), boxes[0].copy()
+    for i in range(N):
+        maxscore = boxes[i, 8]
+        maxpos = i
+        tbox = boxes[i].copy()
+        ti = inds[i]
+        pos = i + 1
+        #get max box
+        while pos < N:
+            if maxscore < boxes[pos, 8]:
+                maxscore = boxes[pos, 8]
+                maxpos = pos
+            pos = pos + 1
+        #add max box as a detection
+        boxes[i, :] = boxes[maxpos, :]
+        inds[i] = inds[maxpos]
+        #swap
+        boxes[maxpos, :] = tbox
+        inds[maxpos] = ti
+        tbox = boxes[i].copy()
+        pos = i + 1
+        #NMS iteration
+        while pos < N:
+            sbox = boxes[pos].copy()
+            ts_iou_val = intersection(tbox, sbox)
+            if ts_iou_val > 0:
+                if method == 1:
+                    if ts_iou_val > Nt_thres:
+                        weight = 1 - ts_iou_val
+                    else:
+                        weight = 1
+                elif method == 2:
+                    weight = np.exp(-1.0 * ts_iou_val**2 / sigma)
+                else:
+                    if ts_iou_val > Nt_thres:
+                        weight = 0
+                    else:
+                        weight = 1
+                boxes[pos, 8] = weight * boxes[pos, 8]
+                #if box score falls below thresold, discard the box by
+                #swaping last box update N
+                if boxes[pos, 8] < threshold:
+                    boxes[pos, :] = boxes[N - 1, :]
+                    inds[pos] = inds[N - 1]
+                    N = N - 1
+                    pos = pos - 1
+            pos = pos + 1
+
+    return boxes[:N]
+
+
+def nms_locality(polys, thres=0.3):
+    """
+    locality aware nms of EAST
+    :param polys: a N*9 numpy array. first 8 coordinates, then prob
+    :return: boxes after nms
+    """
+    S = []
+    p = None
+    for g in polys:
+        if p is not None and intersection(g, p) > thres:
+            p = weighted_merge(g, p)
+        else:
+            if p is not None:
+                S.append(p)
+            p = g
+    if p is not None:
+        S.append(p)
+
+    if len(S) == 0:
+        return np.array([])
+    return standard_nms(np.array(S), thres)
+
+
+if __name__ == '__main__':
+    # 343,350,448,135,474,143,369,359
+    print(
+        Polygon(np.array([[343, 350], [448, 135], [474, 143], [369, 359]]))
+        .area)
--- a/ppocr/utils/init.py
+++ b/ppocr/utils/init.py
@ -0,0 +1,13 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
--- a/ppocr/utils/character.py
+++ b/ppocr/utils/character.py
@ -0,0 +1,171 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import numpy as np
+import string
+import re
+from .check import check_config_params
+import sys
+
+
+class CharacterOps(object):
+    """ Convert between text-label and text-index """
+
+    def __init__(self, config):
+        self.character_type = config['character_type']
+        self.loss_type = config['loss_type']
+        if self.character_type == "en":
+            self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz"
+            dict_character = list(self.character_str)
+        elif self.character_type == "ch":
+            character_dict_path = config['character_dict_path']
+            self.character_str = ""
+            with open(character_dict_path, "rb") as fin:
+                lines = fin.readlines()
+                for line in lines:
+                    line = line.decode('utf-8').strip("\n")
+                    self.character_str += line
+            dict_character = list(self.character_str)
+        elif self.character_type == "en_sensitive":
+            # same with ASTER setting (use 94 char).
+            self.character_str = string.printable[:-6]
+            dict_character = list(self.character_str)
+        else:
+            self.character_str = None
+        assert self.character_str is not None, \
+            "Nonsupport type of the character: {}".format(self.character_str)
+        self.beg_str = "sos"
+        self.end_str = "eos"
+        if self.loss_type == "attention":
+            dict_character = [self.beg_str, self.end_str] + dict_character
+        self.dict = {}
+        for i, char in enumerate(dict_character):
+            self.dict[char] = i
+        self.character = dict_character
+
+    def encode(self, text):
+        """convert text-label into text-index.
+        input:
+            text: text labels of each image. [batch_size]
+
+        output:
+            text: concatenated text index for CTCLoss.
+                    [sum(text_lengths)] = [text_index_0 + text_index_1 + ... + text_index_(n - 1)]
+            length: length of each text. [batch_size]
+        """
+        if self.character_type == "en":
+            text = text.lower()
+
+        text_list = []
+        for char in text:
+            if char not in self.dict:
+                continue
+            text_list.append(self.dict[char])
+        text = np.array(text_list)
+        return text
+
+    def decode(self, text_index, is_remove_duplicate=False):
+        """ convert text-index into text-label. """
+        char_list = []
+        char_num = self.get_char_num()
+
+        if self.loss_type == "attention":
+            beg_idx = self.get_beg_end_flag_idx("beg")
+            end_idx = self.get_beg_end_flag_idx("end")
+            ignored_tokens = [beg_idx, end_idx]
+        else:
+            ignored_tokens = [char_num]
+
+        for idx in range(len(text_index)):
+            if text_index[idx] in ignored_tokens:
+                continue
+            if is_remove_duplicate:
+                if idx > 0 and text_index[idx - 1] == text_index[idx]:
+                    continue
+            char_list.append(self.character[text_index[idx]])
+        text = ''.join(char_list)
+        return text
+
+    def get_char_num(self):
+        return len(self.character)
+
+    def get_beg_end_flag_idx(self, beg_or_end):
+        if self.loss_type == "attention":
+            if beg_or_end == "beg":
+                idx = np.array(self.dict[self.beg_str])
+            elif beg_or_end == "end":
+                idx = np.array(self.dict[self.end_str])
+            else:
+                assert False, "Unsupport type %s in get_beg_end_flag_idx"\
+                    % beg_or_end
+            return idx
+        else:
+            err = "error in get_beg_end_flag_idx when using the loss %s"\
+                % (self.loss_type)
+            assert False, err
+
+
+def cal_predicts_accuracy(char_ops,
+                          preds,
+                          preds_lod,
+                          labels,
+                          labels_lod,
+                          is_remove_duplicate=False):
+    acc_num = 0
+    img_num = 0
+    for ino in range(len(labels_lod) - 1):
+        beg_no = preds_lod[ino]
+        end_no = preds_lod[ino + 1]
+        preds_text = preds[beg_no:end_no].reshape(-1)
+        preds_text = char_ops.decode(preds_text, is_remove_duplicate)
+
+        beg_no = labels_lod[ino]
+        end_no = labels_lod[ino + 1]
+        labels_text = labels[beg_no:end_no].reshape(-1)
+        labels_text = char_ops.decode(labels_text, is_remove_duplicate)
+        img_num += 1
+
+        if preds_text == labels_text:
+            acc_num += 1
+    acc = acc_num * 1.0 / img_num
+    return acc, acc_num, img_num
+
+
+def convert_rec_attention_infer_res(preds):
+    img_num = preds.shape[0]
+    target_lod = [0]
+    convert_ids = []
+    for ino in range(img_num):
+        end_pos = np.where(preds[ino, :] == 1)[0]
+        if len(end_pos) <= 1:
+            text_list = preds[ino, 1:]
+        else:
+            text_list = preds[ino, 1:end_pos[1]]
+        target_lod.append(target_lod[ino] + len(text_list))
+        convert_ids = convert_ids + list(text_list)
+    convert_ids = np.array(convert_ids)
+    convert_ids = convert_ids.reshape((-1, 1))
+    return convert_ids, target_lod
+
+
+def convert_rec_label_to_lod(ori_labels):
+    img_num = len(ori_labels)
+    target_lod = [0]
+    convert_ids = []
+    for ino in range(img_num):
+        target_lod.append(target_lod[ino] + len(ori_labels[ino]))
+        convert_ids = convert_ids + list(ori_labels[ino])
+    convert_ids = np.array(convert_ids)
+    convert_ids = convert_ids.reshape((-1, 1))
+    return convert_ids, target_lod
--- a/ppocr/utils/check.py
+++ b/ppocr/utils/check.py
@ -0,0 +1,33 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import sys
+
+import paddle.fluid as fluid
+
+import logging
+logger = logging.getLogger(__name__)
+
+
+def check_config_params(config, config_name, params):
+    for param in params:
+        if param not in config:
+            err = "param %s didn't find in %s!" % (param, config_name)
+            assert False, err
+    return
--- a/ppocr/utils/ppocr_keys_v1.txt
+++ b/ppocr/utils/ppocr_keys_v1.txt
--- a/ppocr/utils/save_load.py
+++ b/ppocr/utils/save_load.py
@ -0,0 +1,131 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import errno
+import os
+import shutil
+import tempfile
+
+import paddle
+import paddle.fluid as fluid
+
+from .utility import initial_logger
+import re
+logger = initial_logger()
+
+
+def _mkdir_if_not_exist(path):
+    """
+    mkdir if not exists, ignore the exception when multiprocess mkdir together
+    """
+    if not os.path.exists(path):
+        try:
+            os.makedirs(path)
+        except OSError as e:
+            if e.errno == errno.EEXIST and os.path.isdir(path):
+                logger.warning(
+                    'be happy if some process has already created {}'.format(
+                        path))
+            else:
+                raise OSError('Failed to mkdir {}'.format(path))
+
+
+def _load_state(path):
+    if os.path.exists(path + '.pdopt'):
+        # XXX another hack to ignore the optimizer state
+        tmp = tempfile.mkdtemp()
+        dst = os.path.join(tmp, os.path.basename(os.path.normpath(path)))
+        shutil.copy(path + '.pdparams', dst + '.pdparams')
+        state = fluid.io.load_program_state(dst)
+        shutil.rmtree(tmp)
+    else:
+        state = fluid.io.load_program_state(path)
+    return state
+
+
+def load_params(exe, prog, path, ignore_params=[]):
+    """
+    Load model from the given path.
+    Args:
+        exe (fluid.Executor): The fluid.Executor object.
+        prog (fluid.Program): load weight to which Program object.
+        path (string): URL string or loca model path.
+        ignore_params (list): ignore variable to load when finetuning.
+            It can be specified by finetune_exclude_pretrained_params
+            and the usage can refer to docs/advanced_tutorials/TRANSFER_LEARNING.md
+    """
+    if not (os.path.isdir(path) or os.path.exists(path + '.pdparams')):
+        raise ValueError("Model pretrain path {} does not "
+                         "exists.".format(path))
+
+    logger.info('Loading parameters from {}...'.format(path))
+
+    ignore_set = set()
+    state = _load_state(path)
+
+    # ignore the parameter which mismatch the shape
+    # between the model and pretrain weight.
+    all_var_shape = {}
+    for block in prog.blocks:
+        for param in block.all_parameters():
+            all_var_shape[param.name] = param.shape
+    ignore_set.update([
+        name for name, shape in all_var_shape.items()
+        if name in state and shape != state[name].shape
+    ])
+
+    if ignore_params:
+        all_var_names = [var.name for var in prog.list_vars()]
+        ignore_list = filter(
+            lambda var: any([re.match(name, var) for name in ignore_params]),
+            all_var_names)
+        ignore_set.update(list(ignore_list))
+
+    if len(ignore_set) > 0:
+        for k in ignore_set:
+            if k in state:
+                logger.warning('variable {} not used'.format(k))
+                del state[k]
+    fluid.io.set_program_state(prog, state)
+
+
+def init_model(config, program, exe):
+    """
+    load model from checkpoint or pretrained_model
+    """
+    checkpoints = config['Global'].get('checkpoints')
+    if checkpoints:
+        path = checkpoints
+        fluid.load(program, path, exe)
+        logger.info("Finish initing model from {}".format(path))
+        return
+
+    pretrain_weights = config['Global'].get('pretrain_weights')
+    if pretrain_weights:
+        path = pretrain_weights
+        load_params(exe, program, path)
+        logger.info("Finish initing model from {}".format(path))
+    return
+
+
+def save_model(program, model_path):
+    """
+    save model to the target path
+    """
+    fluid.save(program, model_path)
+    logger.info("Already save model in {}".format(model_path))
--- a/ppocr/utils/stats.py
+++ b/ppocr/utils/stats.py
@ -0,0 +1,65 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import collections
+import numpy as np
+import datetime
+
+__all__ = ['TrainingStats', 'Time']
+
+
+class SmoothedValue(object):
+    """Track a series of values and provide access to smoothed values over a
+    window or the global series average.
+    """
+
+    def __init__(self, window_size):
+        self.deque = collections.deque(maxlen=window_size)
+
+    def add_value(self, value):
+        self.deque.append(value)
+
+    def get_median_value(self):
+        return np.median(self.deque)
+
+
+def Time():
+    return datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')
+
+
+class TrainingStats(object):
+    def __init__(self, window_size, stats_keys):
+        self.smoothed_losses_and_metrics = {
+            key: SmoothedValue(window_size)
+            for key in stats_keys
+        }
+
+    def update(self, stats):
+        for k, v in self.smoothed_losses_and_metrics.items():
+            v.add_value(stats[k])
+
+    def get(self, extras=None):
+        stats = collections.OrderedDict()
+        if extras:
+            for k, v in extras.items():
+                stats[k] = v
+        for k, v in self.smoothed_losses_and_metrics.items():
+            stats[k] = round(v.get_median_value(), 6)
+
+        return stats
+
+    def log(self, extras=None):
+        d = self.get(extras)
+        strs = ', '.join(str(dict({x: y})).strip('{}') for x, y in d.items())
+        return strs
--- a/ppocr/utils/utility.py
+++ b/ppocr/utils/utility.py
@ -0,0 +1,71 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+
+
+def initial_logger():
+    FORMAT = '%(asctime)s-%(levelname)s: %(message)s'
+    logging.basicConfig(level=logging.INFO, format=FORMAT)
+    logger = logging.getLogger(__name__)
+    return logger
+
+
+import importlib
+
+
+def create_module(module_str):
+    tmpss = module_str.split(",")
+    assert len(tmpss) == 2, "Error formate\
+        of the module path: {}".format(module_str)
+    module_name, function_name = tmpss[0], tmpss[1]
+    somemodule = importlib.import_module(module_name, __package__)
+    function = getattr(somemodule, function_name)
+    return function
+
+
+def get_check_global_params(mode):
+    check_params = ['use_gpu', 'max_text_length', 'image_shape',\
+        'image_shape', 'character_type', 'loss_type']
+    if mode == "train_eval":
+        check_params = check_params + [\
+            'train_batch_size_per_card', 'test_batch_size_per_card']
+    elif mode == "test":
+        check_params = check_params + ['test_batch_size_per_card']
+    return check_params
+
+
+def get_check_reader_params(mode):
+    check_params = []
+    if mode == "train_eval":
+        check_params = ['TrainReader', 'EvalReader']
+    elif mode == "test":
+        check_params = ['TestReader']
+    return check_params
+
+
+from paddle import fluid
+
+
+def create_multi_devices_program(program, loss_var_name):
+    build_strategy = fluid.BuildStrategy()
+    build_strategy.memory_optimize = False
+    build_strategy.enable_inplace = True
+    exec_strategy = fluid.ExecutionStrategy()
+    exec_strategy.num_iteration_per_drop_scope = 1
+    compile_program = fluid.CompiledProgram(program).with_data_parallel(
+        loss_name=loss_var_name,
+        build_strategy=build_strategy,
+        exec_strategy=exec_strategy)
+    return compile_program
--- a/tools/eval.py
+++ b/tools/eval.py
@ -0,0 +1,102 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+
+
+def set_paddle_flags(**kwargs):
+    for key, value in kwargs.items():
+        if os.environ.get(key, None) is None:
+            os.environ[key] = str(value)
+
+
+# NOTE(paddle-dev): All of these flags should be
+# set before `import paddle`. Otherwise, it would
+# not take any effect.
+set_paddle_flags(
+    FLAGS_eager_delete_tensor_gb=0,  # enable GC to save memory
+)
+
+import program
+from paddle import fluid
+from ppocr.utils.utility import initial_logger
+logger = initial_logger()
+from ppocr.data.reader_main import reader_main
+from ppocr.utils.save_load import init_model
+from eval_utils.eval_det_utils import eval_det_run
+from eval_utils.eval_rec_utils import test_rec_benchmark
+from eval_utils.eval_rec_utils import eval_rec_run
+from ppocr.utils.character import CharacterOps
+
+
+def main():
+    config = program.load_config(FLAGS.config)
+    program.merge_config(FLAGS.opt)
+    logger.info(config)
+
+    # check if set use_gpu=True in paddlepaddle cpu version
+    use_gpu = config['Global']['use_gpu']
+    program.check_gpu(True)
+
+    alg = config['Global']['algorithm']
+    assert alg in ['EAST', 'DB', 'Rosetta', 'CRNN', 'STARNet', 'RARE']
+    if alg in ['Rosetta', 'CRNN', 'STARNet', 'RARE']:
+        config['Global']['char_ops'] = CharacterOps(config['Global'])
+
+    place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()
+    startup_prog = fluid.Program()
+    eval_program = fluid.Program()
+    eval_build_outputs = program.build(
+        config, eval_program, startup_prog, mode='test')
+    eval_fetch_name_list = eval_build_outputs[1]
+    eval_fetch_varname_list = eval_build_outputs[2]
+    eval_program = eval_program.clone(for_test=True)
+    exe = fluid.Executor(place)
+    exe.run(startup_prog)
+
+    init_model(config, eval_program, exe)
+
+    if alg in ['EAST', 'DB']:
+        eval_reader = reader_main(config=config, mode="test")
+        eval_info_dict = {'program':eval_program,\
+            'reader':eval_reader,\
+            'fetch_name_list':eval_fetch_name_list,\
+            'fetch_varname_list':eval_fetch_varname_list}
+        metrics = eval_det_run(exe, config, eval_info_dict, "test")
+    else:
+        dataset = config['Global']['dataset']
+        assert dataset in ['lmdb', 'common']
+        if dataset == 'common':
+            eval_reader = reader_main(config=config, mode="eval")
+            eval_info_dict = {'program': eval_program, \
+                              'reader': eval_reader, \
+                              'fetch_name_list': eval_fetch_name_list, \
+                              'fetch_varname_list': eval_fetch_varname_list}
+            metrics = eval_rec_run(exe, config, eval_info_dict, "eval")
+            print("Eval result:", metrics)
+        else:
+            eval_info_dict = {'program':eval_program,\
+                'fetch_name_list':eval_fetch_name_list,\
+                'fetch_varname_list':eval_fetch_varname_list}
+            test_rec_benchmark(exe, config, eval_info_dict)
+
+
+if __name__ == '__main__':
+    parser = program.ArgsParser()
+    FLAGS = parser.parse_args()
+    main()
--- a/tools/eval_utils/init.py
+++ b/tools/eval_utils/init.py
@ -0,0 +1,13 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
--- a/tools/eval_utils/eval_det_iou.py
+++ b/tools/eval_utils/eval_det_iou.py
@ -0,0 +1,231 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+from collections import namedtuple
+import numpy as np
+from shapely.geometry import Polygon
+
+
+class DetectionIoUEvaluator(object):
+    def __init__(self, iou_constraint=0.5, area_precision_constraint=0.5):
+        self.iou_constraint = iou_constraint
+        self.area_precision_constraint = area_precision_constraint
+
+    def evaluate_image(self, gt, pred):
+        def get_union(pD, pG):
+            return Polygon(pD).union(Polygon(pG)).area
+
+        def get_intersection_over_union(pD, pG):
+            return get_intersection(pD, pG) / get_union(pD, pG)
+
+        def get_intersection(pD, pG):
+            return Polygon(pD).intersection(Polygon(pG)).area
+
+        def compute_ap(confList, matchList, numGtCare):
+            correct = 0
+            AP = 0
+            if len(confList) > 0:
+                confList = np.array(confList)
+                matchList = np.array(matchList)
+                sorted_ind = np.argsort(-confList)
+                confList = confList[sorted_ind]
+                matchList = matchList[sorted_ind]
+                for n in range(len(confList)):
+                    match = matchList[n]
+                    if match:
+                        correct += 1
+                        AP += float(correct) / (n + 1)
+
+                if numGtCare > 0:
+                    AP /= numGtCare
+
+            return AP
+
+        perSampleMetrics = {}
+
+        matchedSum = 0
+
+        Rectangle = namedtuple('Rectangle', 'xmin ymin xmax ymax')
+
+        numGlobalCareGt = 0
+        numGlobalCareDet = 0
+
+        arrGlobalConfidences = []
+        arrGlobalMatches = []
+
+        recall = 0
+        precision = 0
+        hmean = 0
+
+        detMatched = 0
+
+        iouMat = np.empty([1, 1])
+
+        gtPols = []
+        detPols = []
+
+        gtPolPoints = []
+        detPolPoints = []
+
+        # Array of Ground Truth Polygons' keys marked as don't Care
+        gtDontCarePolsNum = []
+        # Array of Detected Polygons' matched with a don't Care GT
+        detDontCarePolsNum = []
+
+        pairs = []
+        detMatchedNums = []
+
+        arrSampleConfidences = []
+        arrSampleMatch = []
+
+        evaluationLog = ""
+
+        # print(len(gt))
+        for n in range(len(gt)):
+            points = gt[n]['points']
+            # transcription = gt[n]['text']
+            dontCare = gt[n]['ignore']
+            points = Polygon(points)
+            points = points.buffer(0)
+            if not Polygon(points).is_valid or not Polygon(points).is_simple:
+                continue
+
+            gtPol = points
+            gtPols.append(gtPol)
+            gtPolPoints.append(points)
+            if dontCare:
+                gtDontCarePolsNum.append(len(gtPols) - 1)
+
+        evaluationLog += "GT polygons: " + str(len(gtPols)) + (
+            " (" + str(len(gtDontCarePolsNum)) + " don't care)\n"
+            if len(gtDontCarePolsNum) > 0 else "\n")
+
+        for n in range(len(pred)):
+            points = pred[n]['points']
+            points = Polygon(points)
+            points = points.buffer(0)
+            if not Polygon(points).is_valid or not Polygon(points).is_simple:
+                continue
+
+            detPol = points
+            detPols.append(detPol)
+            detPolPoints.append(points)
+            if len(gtDontCarePolsNum) > 0:
+                for dontCarePol in gtDontCarePolsNum:
+                    dontCarePol = gtPols[dontCarePol]
+                    intersected_area = get_intersection(dontCarePol, detPol)
+                    pdDimensions = Polygon(detPol).area
+                    precision = 0 if pdDimensions == 0 else intersected_area / pdDimensions
+                    if (precision > self.area_precision_constraint):
+                        detDontCarePolsNum.append(len(detPols) - 1)
+                        break
+
+        evaluationLog += "DET polygons: " + str(len(detPols)) + (
+            " (" + str(len(detDontCarePolsNum)) + " don't care)\n"
+            if len(detDontCarePolsNum) > 0 else "\n")
+
+        if len(gtPols) > 0 and len(detPols) > 0:
+            # Calculate IoU and precision matrixs
+            outputShape = [len(gtPols), len(detPols)]
+            iouMat = np.empty(outputShape)
+            gtRectMat = np.zeros(len(gtPols), np.int8)
+            detRectMat = np.zeros(len(detPols), np.int8)
+            for gtNum in range(len(gtPols)):
+                for detNum in range(len(detPols)):
+                    pG = gtPols[gtNum]
+                    pD = detPols[detNum]
+                    iouMat[gtNum, detNum] = get_intersection_over_union(pD, pG)
+
+            for gtNum in range(len(gtPols)):
+                for detNum in range(len(detPols)):
+                    if gtRectMat[gtNum] == 0 and detRectMat[
+                            detNum] == 0 and gtNum not in gtDontCarePolsNum and detNum not in detDontCarePolsNum:
+                        if iouMat[gtNum, detNum] > self.iou_constraint:
+                            gtRectMat[gtNum] = 1
+                            detRectMat[detNum] = 1
+                            detMatched += 1
+                            pairs.append({'gt': gtNum, 'det': detNum})
+                            detMatchedNums.append(detNum)
+                            evaluationLog += "Match GT #" + \
+                                str(gtNum) + " with Det #" + str(detNum) + "\n"
+
+        numGtCare = (len(gtPols) - len(gtDontCarePolsNum))
+        numDetCare = (len(detPols) - len(detDontCarePolsNum))
+        if numGtCare == 0:
+            recall = float(1)
+            precision = float(0) if numDetCare > 0 else float(1)
+        else:
+            recall = float(detMatched) / numGtCare
+            precision = 0 if numDetCare == 0 else float(detMatched) / numDetCare
+
+        hmean = 0 if (precision + recall) == 0 else 2.0 * \
+            precision * recall / (precision + recall)
+
+        matchedSum += detMatched
+        numGlobalCareGt += numGtCare
+        numGlobalCareDet += numDetCare
+
+        perSampleMetrics = {
+            'precision': precision,
+            'recall': recall,
+            'hmean': hmean,
+            'pairs': pairs,
+            'iouMat': [] if len(detPols) > 100 else iouMat.tolist(),
+            'gtPolPoints': gtPolPoints,
+            'detPolPoints': detPolPoints,
+            'gtCare': numGtCare,
+            'detCare': numDetCare,
+            'gtDontCare': gtDontCarePolsNum,
+            'detDontCare': detDontCarePolsNum,
+            'detMatched': detMatched,
+            'evaluationLog': evaluationLog
+        }
+
+        return perSampleMetrics
+
+    def combine_results(self, results):
+        numGlobalCareGt = 0
+        numGlobalCareDet = 0
+        matchedSum = 0
+        for result in results:
+            numGlobalCareGt += result['gtCare']
+            numGlobalCareDet += result['detCare']
+            matchedSum += result['detMatched']
+
+        methodRecall = 0 if numGlobalCareGt == 0 else float(
+            matchedSum) / numGlobalCareGt
+        methodPrecision = 0 if numGlobalCareDet == 0 else float(
+            matchedSum) / numGlobalCareDet
+        methodHmean = 0 if methodRecall + methodPrecision == 0 else 2 * \
+            methodRecall * methodPrecision / (methodRecall + methodPrecision)
+        # print(methodRecall, methodPrecision, methodHmean)
+        # sys.exit(-1)
+        methodMetrics = {
+            'precision': methodPrecision,
+            'recall': methodRecall,
+            'hmean': methodHmean
+        }
+
+        return methodMetrics
+
+
+if __name__ == '__main__':
+    evaluator = DetectionIoUEvaluator()
+    gts = [[{
+        'points': [(0, 0), (1, 0), (1, 1), (0, 1)],
+        'text': 1234,
+        'ignore': False,
+    }, {
+        'points': [(2, 2), (3, 2), (3, 3), (2, 3)],
+        'text': 5678,
+        'ignore': False,
+    }]]
+    preds = [[{
+        'points': [(0.1, 0.1), (1, 0), (1, 1), (0, 1)],
+        'text': 123,
+        'ignore': False,
+    }]]
+    results = []
+    for gt, pred in zip(gts, preds):
+        results.append(evaluator.evaluate_image(gt, pred))
+    metrics = evaluator.combine_results(results)
+    print(metrics)
--- a/tools/eval_utils/eval_det_utils.py
+++ b/tools/eval_utils/eval_det_utils.py
@ -0,0 +1,131 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import logging
+import numpy as np
+
+import paddle.fluid as fluid
+
+__all__ = ['eval_det_run']
+
+import logging
+FORMAT = '%(asctime)s-%(levelname)s: %(message)s'
+logging.basicConfig(level=logging.INFO, format=FORMAT)
+logger = logging.getLogger(__name__)
+
+from ppocr.utils.utility import create_module
+from .eval_det_iou import DetectionIoUEvaluator
+import json
+from copy import deepcopy
+import cv2
+from ppocr.data.reader_main import reader_main
+
+
+def cal_det_res(exe, config, eval_info_dict):
+    global_params = config['Global']
+    save_res_path = global_params['save_res_path']
+    postprocess_params = deepcopy(config["PostProcess"])
+    postprocess_params.update(global_params)
+    postprocess = create_module(postprocess_params['function']) \
+        (params=postprocess_params)
+    with open(save_res_path, "wb") as fout:
+        tackling_num = 0
+        for data in eval_info_dict['reader']():
+            img_num = len(data)
+            tackling_num = tackling_num + img_num
+            logger.info("test tackling num:%d", tackling_num)
+            img_list = []
+            ratio_list = []
+            img_name_list = []
+            for ino in range(img_num):
+                img_list.append(data[ino][0])
+                ratio_list.append(data[ino][1])
+                img_name_list.append(data[ino][2])
+            img_list = np.concatenate(img_list, axis=0)
+            outs = exe.run(eval_info_dict['program'], \
+                           feed={'image': img_list}, \
+                           fetch_list=eval_info_dict['fetch_varname_list'])
+            outs_dict = {}
+            for tno in range(len(outs)):
+                fetch_name = eval_info_dict['fetch_name_list'][tno]
+                fetch_value = np.array(outs[tno])
+                outs_dict[fetch_name] = fetch_value
+            dt_boxes_list = postprocess(outs_dict, ratio_list)
+            for ino in range(img_num):
+                dt_boxes = dt_boxes_list[ino]
+                img_name = img_name_list[ino]
+                dt_boxes_json = []
+                for box in dt_boxes:
+                    tmp_json = {"transcription": ""}
+                    tmp_json['points'] = box.tolist()
+                    dt_boxes_json.append(tmp_json)
+                otstr = img_name + "\t" + json.dumps(dt_boxes_json) + "\n"
+                fout.write(otstr.encode())
+    return
+
+
+def load_label_infor(label_file_path, do_ignore=False):
+    img_name_label_dict = {}
+    with open(label_file_path, "rb") as fin:
+        lines = fin.readlines()
+        for line in lines:
+            substr = line.decode().strip("\n").split("\t")
+            bbox_infor = json.loads(substr[1])
+            bbox_num = len(bbox_infor)
+            for bno in range(bbox_num):
+                text = bbox_infor[bno]['transcription']
+                ignore = False
+                if text == "###" and do_ignore:
+                    ignore = True
+                bbox_infor[bno]['ignore'] = ignore
+            img_name_label_dict[substr[0]] = bbox_infor
+    return img_name_label_dict
+
+
+def cal_det_metrics(gt_label_path, save_res_path):
+    evaluator = DetectionIoUEvaluator()
+    gt_label_infor = load_label_infor(gt_label_path, do_ignore=True)
+    dt_label_infor = load_label_infor(save_res_path)
+    results = []
+    for img_name in gt_label_infor:
+        gt_label = gt_label_infor[img_name]
+        if img_name not in dt_label_infor:
+            dt_label = []
+        else:
+            dt_label = dt_label_infor[img_name]
+        result = evaluator.evaluate_image(gt_label, dt_label)
+        results.append(result)
+    methodMetrics = evaluator.combine_results(results)
+    return methodMetrics
+
+
+def eval_det_run(exe, config, eval_info_dict, mode):
+    cal_det_res(exe, config, eval_info_dict)
+
+    save_res_path = config['Global']['save_res_path']
+    if mode == "eval":
+        gt_label_path = config['EvalReader']['label_file_path']
+        metrics = cal_det_metrics(gt_label_path, save_res_path)
+    else:
+        gt_label_path = config['TestReader']['label_file_path']
+        do_eval = config['TestReader']['do_eval']
+        if do_eval:
+            metrics = cal_det_metrics(gt_label_path, save_res_path)
+        else:
+            metrics = {}
+    return metrics
--- a/tools/eval_utils/eval_rec_utils.py
+++ b/tools/eval_utils/eval_rec_utils.py
@ -0,0 +1,111 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import logging
+import numpy as np
+
+import paddle.fluid as fluid
+
+__all__ = ['eval_rec_run', 'test_rec_benchmark']
+
+import logging
+
+FORMAT = '%(asctime)s-%(levelname)s: %(message)s'
+logging.basicConfig(level=logging.INFO, format=FORMAT)
+logger = logging.getLogger(__name__)
+
+from ppocr.utils.character import cal_predicts_accuracy
+from ppocr.utils.character import convert_rec_label_to_lod
+from ppocr.utils.character import convert_rec_attention_infer_res
+from ppocr.utils.utility import create_module
+import json
+from copy import deepcopy
+import cv2
+from ppocr.data.reader_main import reader_main
+
+
+def eval_rec_run(exe, config, eval_info_dict, mode):
+    """
+    Run evaluation program, return program outputs.
+    """
+    char_ops = config['Global']['char_ops']
+    total_loss = 0
+    total_sample_num = 0
+    total_acc_num = 0
+    total_batch_num = 0
+    if mode == "eval":
+        is_remove_duplicate = False
+    else:
+        is_remove_duplicate = True
+
+    for data in eval_info_dict['reader']():
+        img_num = len(data)
+        img_list = []
+        label_list = []
+        for ino in range(img_num):
+            img_list.append(data[ino][0])
+            label_list.append(data[ino][1])
+        img_list = np.concatenate(img_list, axis=0)
+        outs = exe.run(eval_info_dict['program'], \
+                       feed={'image': img_list}, \
+                       fetch_list=eval_info_dict['fetch_varname_list'], \
+                       return_numpy=False)
+        preds = np.array(outs[0])
+        if preds.shape[1] != 1:
+            preds, preds_lod = convert_rec_attention_infer_res(preds)
+        else:
+            preds_lod = outs[0].lod()[0]
+        labels, labels_lod = convert_rec_label_to_lod(label_list)
+        acc, acc_num, sample_num = cal_predicts_accuracy(
+            char_ops, preds, preds_lod, labels, labels_lod, is_remove_duplicate)
+        total_acc_num += acc_num
+        total_sample_num += sample_num
+        total_batch_num += 1
+    avg_acc = total_acc_num * 1.0 / total_sample_num
+    metrics = {'avg_acc': avg_acc, "total_acc_num": total_acc_num, \
+               "total_sample_num": total_sample_num}
+    return metrics
+
+
+def test_rec_benchmark(exe, config, eval_info_dict):
+    " 评估lmdb 数据"
+    eval_data_list = ['IIIT5k_3000', 'SVT', 'IC03_860', 'IC03_867', \
+                      'IC13_857', 'IC13_1015', 'IC15_1811', 'IC15_2077', 'SVTP', 'CUTE80']
+    eval_data_dir = config['TestReader']['lmdb_sets_dir']
+    total_evaluation_data_number = 0
+    total_correct_number = 0
+    eval_data_acc_info = {}
+    for eval_data in eval_data_list:
+        config['TestReader']['lmdb_sets_dir'] = \
+            eval_data_dir + "/" + eval_data
+        eval_reader = reader_main(config=config, mode="test")
+        eval_info_dict['reader'] = eval_reader
+        metrics = eval_rec_run(exe, config, eval_info_dict, "test")
+        total_evaluation_data_number += metrics['total_sample_num']
+        total_correct_number += metrics['total_acc_num']
+        eval_data_acc_info[eval_data] = metrics
+
+    avg_acc = total_correct_number * 1.0 / total_evaluation_data_number
+    logger.info('-' * 50)
+    strs = ""
+    for eval_data in eval_data_list:
+        eval_acc = eval_data_acc_info[eval_data]['avg_acc']
+        strs += "\n {}, accuracy:{:.6f}".format(eval_data, eval_acc)
+    strs += "\n average, accuracy:{:.6f}".format(avg_acc)
+    logger.info(strs)
+    logger.info('-' * 50)
--- a/tools/export_model.py
+++ b/tools/export_model.py
@ -0,0 +1,88 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import sys
+import time
+import multiprocessing
+import numpy as np
+
+
+def set_paddle_flags(**kwargs):
+    for key, value in kwargs.items():
+        if os.environ.get(key, None) is None:
+            os.environ[key] = str(value)
+
+
+# NOTE(paddle-dev): All of these flags should be
+# set before `import paddle`. Otherwise, it would
+# not take any effect. 
+set_paddle_flags(
+    FLAGS_eager_delete_tensor_gb=0,  # enable GC to save memory
+)
+
+import program
+from paddle import fluid
+from ppocr.utils.utility import initial_logger
+logger = initial_logger()
+from ppocr.utils.save_load import init_model
+from ppocr.utils.character import CharacterOps
+from ppocr.utils.utility import create_module
+
+
+def main():
+    config = program.load_config(FLAGS.config)
+    program.merge_config(FLAGS.opt)
+    logger.info(config)
+
+    # check if set use_gpu=True in paddlepaddle cpu version
+    use_gpu = config['Global']['use_gpu']
+    program.check_gpu(True)
+
+    alg = config['Global']['algorithm']
+    assert alg in ['EAST', 'DB', 'Rosetta', 'CRNN', 'STARNet', 'RARE']
+    if alg in ['Rosetta', 'CRNN', 'STARNet', 'RARE']:
+        config['Global']['char_ops'] = CharacterOps(config['Global'])
+
+    place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()
+    startup_prog = fluid.Program()
+    eval_program = fluid.Program()
+
+    feeded_var_names, target_vars, fetches_var_name = program.build_export(
+        config, eval_program, startup_prog)
+    eval_program = eval_program.clone(for_test=True)
+    exe = fluid.Executor(place)
+    exe.run(startup_prog)
+
+    init_model(config, eval_program, exe)
+
+    fluid.io.save_inference_model(
+        dirname="./output/",
+        feeded_var_names=feeded_var_names,
+        main_program=eval_program,
+        target_vars=target_vars,
+        executor=exe,
+        model_filename='model',
+        params_filename='params')
+    print("save success, output_name_list:", fetches_var_name)
+
+
+if __name__ == '__main__':
+    parser = program.ArgsParser()
+    FLAGS = parser.parse_args()
+    main()
--- a/tools/infer/det_program.txt
+++ b/tools/infer/det_program.txt
@ -0,0 +1 @@
+<paddle.fluid.core_avx.ProgramDesc object at 0x10d15fab0>
--- a/tools/infer/predict_det.py
+++ b/tools/infer/predict_det.py
@ -0,0 +1,169 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import utility
+from ppocr.utils.utility import initial_logger
+logger = initial_logger()
+import cv2
+from ppocr.data.det.east_process import EASTProcessTest
+from ppocr.data.det.db_process import DBProcessTest
+from ppocr.postprocess.db_postprocess import DBPostProcess
+from ppocr.postprocess.east_postprocess import EASTPostPocess
+import copy
+import numpy as np
+import math
+import time
+
+
+class TextDetector(object):
+    def __init__(self, args):
+        max_side_len = args.det_max_side_len
+        self.det_algorithm = args.det_algorithm
+        preprocess_params = {'max_side_len': max_side_len}
+        postprocess_params = {}
+        if self.det_algorithm == "DB":
+            self.preprocess_op = DBProcessTest(preprocess_params)
+            postprocess_params["thresh"] = args.det_db_thresh
+            postprocess_params["box_thresh"] = args.det_db_box_thresh
+            postprocess_params["max_candidates"] = 1000
+            self.postprocess_op = DBPostProcess(postprocess_params)
+        elif self.det_algorithm == "EAST":
+            self.preprocess_op = EASTProcessTest(preprocess_params)
+            postprocess_params["score_thresh"] = args.det_east_score_thresh
+            postprocess_params["cover_thresh"] = args.det_east_cover_thresh
+            postprocess_params["nms_thresh"] = args.det_east_nms_thresh
+            self.postprocess_op = EASTPostPocess(postprocess_params)
+        else:
+            logger.info("unknown det_algorithm:{}".format(self.det_algorithm))
+            sys.exit(0)
+
+        self.predictor, self.input_tensor, self.output_tensors =\
+            utility.create_predictor(args, mode="det")
+
+    def order_points_clockwise(self, pts):
+        #######
+        ## https://github.com/jrosebr1/imutils/blob/master/imutils/perspective.py
+        ########
+        # sort the points based on their x-coordinates
+        xSorted = pts[np.argsort(pts[:, 0]), :]
+
+        # grab the left-most and right-most points from the sorted
+        # x-roodinate points
+        leftMost = xSorted[:2, :]
+        rightMost = xSorted[2:, :]
+
+        # now, sort the left-most coordinates according to their
+        # y-coordinates so we can grab the top-left and bottom-left
+        # points, respectively
+        leftMost = leftMost[np.argsort(leftMost[:, 1]), :]
+        (tl, bl) = leftMost
+
+        rightMost = rightMost[np.argsort(rightMost[:, 1]), :]
+        (tr, br) = rightMost
+
+        rect = np.array([tl, tr, br, bl], dtype="float32")
+        return rect
+
+    def expand_det_res(self, points, bbox_height, bbox_width, img_height,
+                       img_width):
+        if bbox_height * 1.0 / bbox_width >= 2.0:
+            expand_w = bbox_width * 0.20
+            expand_h = bbox_width * 0.20
+        elif bbox_width * 1.0 / bbox_height >= 3.0:
+            expand_w = bbox_height * 0.20
+            expand_h = bbox_height * 0.20
+        else:
+            expand_w = bbox_height * 0.1
+            expand_h = bbox_height * 0.1
+
+        points[0, 0] = int(max((points[0, 0] - expand_w), 0))
+        points[1, 0] = int(min((points[1, 0] + expand_w), img_width))
+        points[3, 0] = int(max((points[3, 0] - expand_w), 0))
+        points[2, 0] = int(min((points[2, 0] + expand_w), img_width))
+
+        points[0, 1] = int(max((points[0, 1] - expand_h), 0))
+        points[1, 1] = int(max((points[1, 1] - expand_h), 0))
+        points[3, 1] = int(min((points[3, 1] + expand_h), img_height))
+        points[2, 1] = int(min((points[2, 1] + expand_h), img_height))
+        return points
+
+    def filter_tag_det_res(self, dt_boxes, image_shape):
+        img_height, img_width = image_shape[0:2]
+        dt_boxes_new = []
+        for box in dt_boxes:
+            box = self.order_points_clockwise(box)
+            left = int(np.min(box[:, 0]))
+            right = int(np.max(box[:, 0]))
+            top = int(np.min(box[:, 1]))
+            bottom = int(np.max(box[:, 1]))
+            bbox_height = bottom - top
+            bbox_width = right - left
+            diffh = math.fabs(box[0, 1] - box[1, 1])
+            diffw = math.fabs(box[0, 0] - box[3, 0])
+            rect_width = int(np.linalg.norm(box[0] - box[1]))
+            rect_height = int(np.linalg.norm(box[0] - box[3]))
+            if rect_width <= 10 or rect_height <= 10:
+                continue
+            if diffh <= 10 and diffw <= 10:
+                box = self.expand_det_res(
+                    copy.deepcopy(box), bbox_height, bbox_width, img_height,
+                    img_width)
+            dt_boxes_new.append(box)
+        dt_boxes = np.array(dt_boxes_new)
+        return dt_boxes
+
+    def __call__(self, img):
+        ori_im = img.copy()
+        im, ratio_list = self.preprocess_op(img)
+        if im is None:
+            return None, 0
+        im = im.copy()
+        starttime = time.time()
+        self.input_tensor.copy_from_cpu(im)
+        self.predictor.zero_copy_run()
+        outputs = []
+        for output_tensor in self.output_tensors:
+            output = output_tensor.copy_to_cpu()
+            outputs.append(output)
+        outs_dict = {}
+        if self.det_algorithm == "EAST":
+            outs_dict['f_score'] = outputs[0]
+            outs_dict['f_geo'] = outputs[1]
+        else:
+            outs_dict['maps'] = [outputs[0]]
+        dt_boxes_list = self.postprocess_op(outs_dict, [ratio_list])
+        dt_boxes = dt_boxes_list[0]
+        dt_boxes = self.filter_tag_det_res(dt_boxes, ori_im.shape)
+        elapse = time.time() - starttime
+        return dt_boxes, elapse
+
+
+if __name__ == "__main__":
+    args = utility.parse_args()
+    image_file_list = utility.get_image_file_list(args.image_dir)
+    text_detector = TextDetector(args)
+    count = 0
+    total_time = 0
+    for image_file in image_file_list:
+        img = cv2.imread(image_file)
+        if img is None:
+            logger.info("error in loading image:{}".format(image_file))
+            continue
+        dt_boxes, elapse = text_detector(img)
+        if count > 0:
+            total_time += elapse
+        count += 1
+        print("Predict time of %s:" % image_file, elapse)
+        utility.draw_text_det_res(dt_boxes, image_file)
+    print("Avg Time:", total_time / (count - 1))
--- a/tools/infer/predict_eval.py
+++ b/tools/infer/predict_eval.py
@ -0,0 +1,76 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import utility
+from ppocr.utils.utility import initial_logger
+logger = initial_logger()
+import cv2
+import predict_system
+import copy
+import numpy as np
+import math
+import time
+import json
+
+if __name__ == "__main__":
+    args = utility.parse_args()
+    text_sys = predict_system.TextSystem(args)
+
+    image_file_list = []
+    label_file_path = "./eval_perform/gt_res/test_chinese_ic15_500_4pts.txt"
+    img_set_path = "./eval_perform/"
+    with open(label_file_path, "rb") as fin:
+        lines = fin.readlines()
+        for line in lines:
+            substr = line.decode('utf-8').strip("\n").split("\t")
+            if "lsvt" in substr[0]:
+                continue
+            image_file_list.append(substr[0])
+
+    total_time_all = 0
+    count = 0
+    save_path = "./output/predict.txt"
+    fout = open(save_path, "wb")
+    for image_name in image_file_list:
+        image_file = img_set_path + image_name
+        img = cv2.imread(image_file)
+        if img is None:
+            logger.info("error in loading image:{}".format(image_file))
+            continue
+        count += 1
+        total_time = 0
+        starttime = time.time()
+        dt_boxes, rec_res = text_sys(img)
+        elapse = time.time() - starttime
+        total_time_all += elapse
+        print("Predict time of %s(%d): %.3fs" % (image_file, count, elapse))
+        dt_num = len(dt_boxes)
+        bbox_list = []
+        for dno in range(dt_num):
+            box = dt_boxes[dno]
+            text, score = rec_res[dno]
+            points = []
+            for tno in range(len(box)):
+                points.append([box[tno][0] * 1.0, box[tno][1] * 1.0])
+            bbox_list.append({
+                "transcription": text,
+                "points": points,
+                "scores": score * 1.0
+            })
+        otstr = image_name + "\t" + json.dumps(bbox_list) + "\n"
+        fout.write(otstr.encode('utf-8'))
+    avg_time = total_time_all / count
+    logger.info("avg_time: {0}".format(avg_time))
+    logger.info("avg_fps: {0}".format(1.0 / avg_time))
+    fout.close()
--- a/tools/infer/predict_eval_new.py
+++ b/tools/infer/predict_eval_new.py
@ -0,0 +1,72 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import utility
+from ppocr.utils.utility import initial_logger
+logger = initial_logger()
+import cv2
+import predict_system
+import copy
+import numpy as np
+import math
+import time
+import json
+import os
+
+if __name__ == "__main__":
+    args = utility.parse_args()
+    text_sys = predict_system.TextSystem(args)
+
+    image_file_list = []
+    img_set_path = "/paddle/code/dyn/test_imgs/rctw_samples/"
+    image_file_list = os.listdir(img_set_path)
+
+    total_time_all = 0
+    count = 0
+    save_path = "./output/predict.txt"
+    fout = open(save_path, "wb")
+    for image_name in image_file_list:
+        image_file = img_set_path + image_name
+        img = cv2.imread(image_file)
+        if img is None:
+            logger.info("error in loading image:{}".format(image_file))
+            continue
+        count += 1
+        starttime = time.time()
+        dt_boxes, rec_res = text_sys(img)
+        if dt_boxes is None:
+            count -= 1
+            continue
+        elapse = time.time() - starttime
+        total_time_all += elapse
+        print("Predict time of %s(%d): %.3fs" % (image_file, count, elapse))
+        dt_num = len(dt_boxes)
+        bbox_list = []
+        for dno in range(dt_num):
+            box = dt_boxes[dno]
+            text, score = rec_res[dno]
+            points = []
+            for tno in range(len(box)):
+                points.append([box[tno][0] * 1.0, box[tno][1] * 1.0])
+            bbox_list.append({
+                "transcription": text,
+                "points": points,
+                "scores": score * 1.0
+            })
+        otstr = image_name + "\t" + json.dumps(bbox_list) + "\n"
+        fout.write(otstr.encode('utf-8'))
+    avg_time = total_time_all / count
+    logger.info("avg_time: {0}".format(avg_time))
+    logger.info("avg_fps: {0}".format(1.0 / avg_time))
+    fout.close()
--- a/tools/infer/predict_rec.py
+++ b/tools/infer/predict_rec.py
@ -0,0 +1,115 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import utility
+from ppocr.utils.utility import initial_logger
+logger = initial_logger()
+import cv2
+
+import copy
+import numpy as np
+import math
+import time
+from ppocr.utils.character import CharacterOps
+
+
+class TextRecognizer(object):
+    def __init__(self, args):
+        self.predictor, self.input_tensor, self.output_tensors =\
+            utility.create_predictor(args, mode="rec")
+        image_shape = [int(v) for v in args.rec_image_shape.split(",")]
+        self.rec_image_shape = image_shape
+        char_ops_params = {}
+        char_ops_params["character_type"] = args.rec_char_type
+        char_ops_params["character_dict_path"] = args.rec_char_dict_path
+        char_ops_params['loss_type'] = 'ctc'
+        self.char_ops = CharacterOps(char_ops_params)
+
+    def resize_norm_img(self, img):
+        imgC, imgH, imgW = self.rec_image_shape
+        h = img.shape[0]
+        w = img.shape[1]
+        ratio = w / float(h)
+        if math.ceil(imgH * ratio) > imgW:
+            resized_w = imgW
+        else:
+            resized_w = int(math.ceil(imgH * ratio))
+        resized_image = cv2.resize(img, (resized_w, imgH))
+        resized_image = resized_image.astype('float32')
+        resized_image = resized_image.transpose((2, 0, 1)) / 255
+        resized_image -= 0.5
+        resized_image /= 0.5
+        padding_im = np.zeros((imgC, imgH, imgW), dtype=np.float32)
+        padding_im[:, :, 0:resized_w] = resized_image
+        return padding_im
+
+    def __call__(self, img_list):
+        img_num = len(img_list)
+        batch_num = 15
+        rec_res = []
+        predict_time = 0
+        for beg_img_no in range(0, img_num, batch_num):
+            end_img_no = min(img_num, beg_img_no + batch_num)
+            norm_img_batch = []
+            for ino in range(beg_img_no, end_img_no):
+                norm_img = self.resize_norm_img(img_list[ino])
+                norm_img = norm_img[np.newaxis, :]
+                norm_img_batch.append(norm_img)
+            norm_img_batch = np.concatenate(norm_img_batch)
+            norm_img_batch = norm_img_batch.copy()
+            starttime = time.time()
+            self.input_tensor.copy_from_cpu(norm_img_batch)
+            self.predictor.zero_copy_run()
+            rec_idx_batch = self.output_tensors[0].copy_to_cpu()
+            rec_idx_lod = self.output_tensors[0].lod()[0]
+            predict_batch = self.output_tensors[1].copy_to_cpu()
+            predict_lod = self.output_tensors[1].lod()[0]
+            elapse = time.time() - starttime
+            predict_time += elapse
+            starttime = time.time()
+            for rno in range(len(rec_idx_lod) - 1):
+                beg = rec_idx_lod[rno]
+                end = rec_idx_lod[rno + 1]
+                rec_idx_tmp = rec_idx_batch[beg:end, 0]
+                preds_text = self.char_ops.decode(rec_idx_tmp)
+                beg = predict_lod[rno]
+                end = predict_lod[rno + 1]
+                probs = predict_batch[beg:end, :]
+                ind = np.argmax(probs, axis=1)
+                blank = probs.shape[1]
+                valid_ind = np.where(ind != (blank - 1))[0]
+                score = np.mean(probs[valid_ind, ind[valid_ind]])
+                rec_res.append([preds_text, score])
+        return rec_res, predict_time
+
+
+if __name__ == "__main__":
+    args = utility.parse_args()
+    image_file_list = utility.get_image_file_list(args.image_dir)
+    text_recognizer = TextRecognizer(args)
+    valid_image_file_list = []
+    img_list = []
+    for image_file in image_file_list:
+        img = cv2.imread(image_file)
+        if img is None:
+            logger.info("error in loading image:{}".format(image_file))
+            continue
+        valid_image_file_list.append(image_file)
+        img_list.append(img)
+    rec_res, predict_time = text_recognizer(img_list)
+    rec_res, predict_time = text_recognizer(img_list)
+    for ino in range(len(img_list)):
+        print("Predicts of %s:%s" % (valid_image_file_list[ino], rec_res[ino]))
+    print("Total predict time for %d images:%.3f" %
+          (len(img_list), predict_time))
--- a/tools/infer/predict_system.py
+++ b/tools/infer/predict_system.py
@ -0,0 +1,97 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import utility
+from ppocr.utils.utility import initial_logger
+logger = initial_logger()
+import cv2
+import predict_det
+import predict_rec
+import copy
+import numpy as np
+import math
+import time
+
+
+class TextSystem(object):
+    def __init__(self, args):
+        self.text_detector = predict_det.TextDetector(args)
+        self.text_recognizer = predict_rec.TextRecognizer(args)
+
+    def get_rotate_crop_image(self, img, points):
+        img_height, img_width = img.shape[0:2]
+        left = int(np.min(points[:, 0]))
+        right = int(np.max(points[:, 0]))
+        top = int(np.min(points[:, 1]))
+        bottom = int(np.max(points[:, 1]))
+        img_crop = img[top:bottom, left:right, :].copy()
+        points[:, 0] = points[:, 0] - left
+        points[:, 1] = points[:, 1] - top
+        img_crop_width = int(np.linalg.norm(points[0] - points[1]))
+        img_crop_height = int(np.linalg.norm(points[0] - points[3]))
+        pts_std = np.float32([[0, 0], [img_crop_width, 0],\
+            [img_crop_width, img_crop_height], [0, img_crop_height]])
+        M = cv2.getPerspectiveTransform(points, pts_std)
+        dst_img = cv2.warpPerspective(
+            img_crop,
+            M, (img_crop_width, img_crop_height),
+            borderMode=cv2.BORDER_REPLICATE)
+        dst_img_height, dst_img_width = dst_img.shape[0:2]
+        if dst_img_height * 1.0 / dst_img_width >= 1.5:
+            dst_img = np.rot90(dst_img)
+        return dst_img
+
+    def print_draw_crop_rec_res(self, img_crop_list, rec_res):
+        bbox_num = len(img_crop_list)
+        for bno in range(bbox_num):
+            cv2.imwrite("./output/img_crop_%d.jpg" % bno, img_crop_list[bno])
+            print(bno, rec_res[bno])
+
+    def __call__(self, img):
+        ori_im = img.copy()
+        dt_boxes, elapse = self.text_detector(img)
+        if dt_boxes is None:
+            return None, None
+        img_crop_list = []
+        for bno in range(len(dt_boxes)):
+            tmp_box = copy.deepcopy(dt_boxes[bno])
+            img_crop = self.get_rotate_crop_image(ori_im, tmp_box)
+            img_crop_list.append(img_crop)
+        rec_res, elapse = self.text_recognizer(img_crop_list)
+        #         self.print_draw_crop_rec_res(img_crop_list, rec_res)
+        return dt_boxes, rec_res
+
+
+if __name__ == "__main__":
+    args = utility.parse_args()
+    image_file_list = utility.get_image_file_list(args.image_dir)
+    text_sys = TextSystem(args)
+    for image_file in image_file_list:
+        img = cv2.imread(image_file)
+        if img is None:
+            logger.info("error in loading image:{}".format(image_file))
+            continue
+        starttime = time.time()
+        dt_boxes, rec_res = text_sys(img)
+        elapse = time.time() - starttime
+        print("Predict time of %s: %.3fs" % (image_file, elapse))
+        dt_num = len(dt_boxes)
+        dt_boxes_final = []
+        for dno in range(dt_num):
+            text, score = rec_res[dno]
+            if score >= 0:
+                text_str = "%s, %.3f" % (text, score)
+                print(text_str)
+                dt_boxes_final.append(dt_boxes[dno])
+        utility.draw_text_det_res(dt_boxes_final, image_file)
--- a/tools/infer/utility.py
+++ b/tools/infer/utility.py
@ -0,0 +1,147 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import os, sys
+from ppocr.utils.utility import initial_logger
+logger = initial_logger()
+from paddle.fluid.core import PaddleTensor
+from paddle.fluid.core import AnalysisConfig
+from paddle.fluid.core import create_paddle_predictor
+import cv2
+import numpy as np
+
+
+def parse_args():
+    def str2bool(v):
+        return v.lower() in ("true", "t", "1")
+
+    parser = argparse.ArgumentParser()
+    #params for prediction engine
+    parser.add_argument("--use_gpu", type=str2bool, default=True)
+    parser.add_argument("--ir_optim", type=str2bool, default=True)
+    parser.add_argument("--use_tensorrt", type=str2bool, default=False)
+    parser.add_argument("--gpu_mem", type=int, default=8000)
+
+    #params for text detector
+    parser.add_argument("--image_dir", type=str)
+    parser.add_argument("--det_algorithm", type=str, default='DB')
+    parser.add_argument("--det_model_dir", type=str)
+    parser.add_argument("--det_max_side_len", type=float, default=960)
+
+    #DB parmas
+    parser.add_argument("--det_db_thresh", type=float, default=0.3)
+    parser.add_argument("--det_db_box_thresh", type=float, default=0.5)
+
+    #EAST parmas
+    parser.add_argument("--det_east_score_thresh", type=float, default=0.8)
+    parser.add_argument("--det_east_cover_thresh", type=float, default=0.1)
+    parser.add_argument("--det_east_nms_thresh", type=float, default=0.2)
+
+    #params for text recognizer
+    parser.add_argument("--rec_algorithm", type=str, default='CRNN')
+    parser.add_argument("--rec_model_dir", type=str)
+    parser.add_argument("--rec_image_shape", type=str, default="3, 32, 320")
+    parser.add_argument("--rec_char_type", type=str, default='ch')
+    parser.add_argument(
+        "--rec_char_dict_path",
+        type=str,
+        default="./ppocr/utils/ppocr_keys_v1.txt")
+    return parser.parse_args()
+
+
+def get_image_file_list(image_dir):
+    image_file_list = []
+    if image_dir is None:
+        return image_file_list
+    if os.path.isfile(image_dir):
+        image_file_list = [image_dir]
+    elif os.path.isdir(image_dir):
+        for single_file in os.listdir(image_dir):
+            image_file_list.append(os.path.join(image_dir, single_file))
+    return image_file_list
+
+
+def create_predictor(args, mode):
+    if mode == "det":
+        model_dir = args.det_model_dir
+    else:
+        model_dir = args.rec_model_dir
+
+    if model_dir is None:
+        logger.info("not find {} model file path {}".format(mode, model_dir))
+        sys.exit(0)
+    model_file_path = model_dir + "/model"
+    params_file_path = model_dir + "/params"
+    if not os.path.exists(model_file_path):
+        logger.info("not find model file path {}".format(model_file_path))
+        sys.exit(0)
+    if not os.path.exists(params_file_path):
+        logger.info("not find params file path {}".format(params_file_path))
+        sys.exit(0)
+
+    config = AnalysisConfig(model_file_path, params_file_path)
+
+    if args.use_gpu:
+        config.enable_use_gpu(args.gpu_mem, 0)
+    else:
+        config.disable_gpu()
+
+    config.disable_glog_info()
+    config.switch_ir_optim(args.ir_optim)
+    #     if args.use_tensorrt:
+    #         config.enable_tensorrt_engine(
+    #             precision_mode=AnalysisConfig.Precision.Half
+    #             if args.use_fp16 else AnalysisConfig.Precision.Float32,
+    #             max_batch_size=args.batch_size)
+
+    config.enable_memory_optim()
+    # use zero copy
+    config.switch_use_feed_fetch_ops(False)
+    predictor = create_paddle_predictor(config)
+    input_names = predictor.get_input_names()
+    input_tensor = predictor.get_input_tensor(input_names[0])
+    output_names = predictor.get_output_names()
+    output_tensors = []
+    for output_name in output_names:
+        output_tensor = predictor.get_output_tensor(output_name)
+        output_tensors.append(output_tensor)
+    return predictor, input_tensor, output_tensors
+
+
+def draw_text_det_res(dt_boxes, img_path):
+    src_im = cv2.imread(img_path)
+    for box in dt_boxes:
+        box = np.array(box).astype(np.int32).reshape(-1, 2)
+        cv2.polylines(src_im, [box], True, color=(255, 255, 0), thickness=2)
+    img_name_pure = img_path.split("/")[-1]
+    cv2.imwrite("./output/%s" % img_name_pure, src_im)
+
+
+if __name__ == '__main__':
+    args = parse_args()
+    args.use_gpu = False
+    root_path = "/Users/liuweiwei06/Desktop/TEST_CODES/icode/baidu/personal-code/PaddleOCR/"
+    args.det_model_dir = root_path + "test_models/public_v1/ch_det_mv3_db"
+
+    predictor, input_tensor, output_tensors = create_predictor(args, mode='det')
+    print(predictor.get_input_names())
+    print(predictor.get_output_names())
+    print(predictor.program(), file=open("det_program.txt", 'w'))
+
+    args.rec_model_dir = root_path + "test_models/public_v1/ch_rec_mv3_crnn/"
+    rec_predictor, input_tensor, output_tensors = create_predictor(
+        args, mode='rec')
+    print(rec_predictor.get_input_names())
+    print(rec_predictor.get_output_names())
--- a/tools/infer_rec.py
+++ b/tools/infer_rec.py
@ -0,0 +1,125 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import time
+import multiprocessing
+import numpy as np
+
+
+def set_paddle_flags(**kwargs):
+    for key, value in kwargs.items():
+        if os.environ.get(key, None) is None:
+            os.environ[key] = str(value)
+
+
+# NOTE(paddle-dev): All of these flags should be
+# set before `import paddle`. Otherwise, it would
+# not take any effect.
+set_paddle_flags(
+    FLAGS_eager_delete_tensor_gb=0,  # enable GC to save memory
+)
+
+from paddle import fluid
+
+# from ppocr.utils.utility import load_config, merge_config
+from ppocr.data.reader_main import test_reader
+import program
+from paddle import fluid
+from ppocr.utils.utility import initial_logger
+logger = initial_logger()
+from ppocr.data.reader_main import reader_main
+from ppocr.utils.save_load import init_model
+from ppocr.utils.character import CharacterOps
+from ppocr.utils.utility import create_module
+
+logger = initial_logger()
+
+
+def main():
+    config = program.load_config(FLAGS.config)
+    program.merge_config(FLAGS.opt)
+    logger.info(config)
+    char_ops = CharacterOps(config['Global'])
+    config['Global']['char_ops'] = char_ops
+
+    # check if set use_gpu=True in paddlepaddle cpu version
+    use_gpu = config['Global']['use_gpu']
+    #     check_gpu(use_gpu)
+
+    place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()
+    exe = fluid.Executor(place)
+
+    rec_model = create_module(config['Architecture']['function'])(params=config)
+
+    startup_prog = fluid.Program()
+    eval_prog = fluid.Program()
+    with fluid.program_guard(eval_prog, startup_prog):
+        with fluid.unique_name.guard():
+            _, outputs = rec_model(mode="test")
+            fetch_name_list = list(outputs.keys())
+            fetch_varname_list = [outputs[v].name for v in fetch_name_list]
+    eval_prog = eval_prog.clone(for_test=True)
+    exe.run(startup_prog)
+
+    init_model(config, eval_prog, exe)
+
+    blobs = reader_main(config, 'test')
+    imgs = next(blobs())
+    for img in imgs:
+        predict = exe.run(program=eval_prog,
+                          feed={"image": img},
+                          fetch_list=fetch_varname_list,
+                          return_numpy=False)
+
+        preds = np.array(predict[0])
+        if preds.shape[1] == 1:
+            preds = preds.reshape(-1)
+            preds_lod = predict[0].lod()[0]
+            preds_text = char_ops.decode(preds)
+        else:
+            end_pos = np.where(preds[0, :] == 1)[0]
+            if len(end_pos) <= 1:
+                preds_text = preds[0, 1:]
+            else:
+                preds_text = preds[0, 1:end_pos[1]]
+            preds_text = preds_text.reshape(-1)
+            preds_text = char_ops.decode(preds_text)
+
+        print(preds)
+        print(preds_text)
+
+    # save for inference model
+    target_var = []
+    for key, values in outputs.items():
+        target_var.append(values)
+
+    fluid.io.save_inference_model(
+        "./output/",
+        feeded_var_names=['image'],
+        target_vars=target_var,
+        executor=exe,
+        main_program=eval_prog,
+        model_filename="model",
+        params_filename="params")
+
+
+if __name__ == '__main__':
+    parser = program.ArgsParser()
+    FLAGS = parser.parse_args()
+    main()
--- a/tools/program.py
+++ b/tools/program.py
@ -0,0 +1,365 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from argparse import ArgumentParser, RawDescriptionHelpFormatter
+import sys
+import yaml
+import os
+from ppocr.utils.utility import create_module
+from ppocr.utils.utility import initial_logger
+logger = initial_logger()
+
+import paddle.fluid as fluid
+import time
+from ppocr.utils.stats import TrainingStats
+from eval_utils.eval_det_utils import eval_det_run
+from eval_utils.eval_rec_utils import eval_rec_run
+from ppocr.utils.save_load import save_model
+import numpy as np
+from ppocr.utils.character import cal_predicts_accuracy
+
+
+class ArgsParser(ArgumentParser):
+    def __init__(self):
+        super(ArgsParser, self).__init__(
+            formatter_class=RawDescriptionHelpFormatter)
+        self.add_argument("-c", "--config", help="configuration file to use")
+        self.add_argument(
+            "-o", "--opt", nargs='+', help="set configuration options")
+
+    def parse_args(self, argv=None):
+        args = super(ArgsParser, self).parse_args(argv)
+        assert args.config is not None, \
+            "Please specify --config=configure_file_path."
+        args.opt = self._parse_opt(args.opt)
+        return args
+
+    def _parse_opt(self, opts):
+        config = {}
+        if not opts:
+            return config
+        for s in opts:
+            s = s.strip()
+            k, v = s.split('=')
+            config[k] = yaml.load(v, Loader=yaml.Loader)
+        return config
+
+
+class AttrDict(dict):
+    """Single level attribute dict, NOT recursive"""
+
+    def __init__(self, **kwargs):
+        super(AttrDict, self).__init__()
+        super(AttrDict, self).update(kwargs)
+
+    def __getattr__(self, key):
+        if key in self:
+            return self[key]
+        raise AttributeError("object has no attribute '{}'".format(key))
+
+
+global_config = AttrDict()
+
+
+def load_config(file_path):
+    """
+    Load config from yml/yaml file.
+
+    Args:
+        file_path (str): Path of the config file to be loaded.
+
+    Returns: global config
+    """
+    _, ext = os.path.splitext(file_path)
+    assert ext in ['.yml', '.yaml'], "only support yaml files for now"
+    merge_config(yaml.load(open(file_path), Loader=yaml.Loader))
+    assert "reader_yml" in global_config['Global'],\
+        "absence reader_yml in global"
+    reader_file_path = global_config['Global']['reader_yml']
+    _, ext = os.path.splitext(reader_file_path)
+    assert ext in ['.yml', '.yaml'], "only support yaml files for reader"
+    merge_config(yaml.load(open(reader_file_path), Loader=yaml.Loader))
+    return global_config
+
+
+def merge_config(config):
+    """
+    Merge config into global config.
+
+    Args:
+        config (dict): Config to be merged.
+
+    Returns: global config
+    """
+    for key, value in config.items():
+        if "." not in key:
+            if isinstance(value, dict) and key in global_config:
+                global_config[key].update(value)
+            else:
+                global_config[key] = value
+        else:
+            sub_keys = key.split('.')
+            assert (sub_keys[0] in global_config)
+            cur = global_config[sub_keys[0]]
+            for idx, sub_key in enumerate(sub_keys[1:]):
+                assert (sub_key in cur)
+                if idx == len(sub_keys) - 2:
+                    cur[sub_key] = value
+                else:
+                    cur = cur[sub_key]
+
+
+def check_gpu(use_gpu):
+    """
+    Log error and exit when set use_gpu=true in paddlepaddle
+    cpu version.
+    """
+    err = "Config use_gpu cannot be set as true while you are " \
+          "using paddlepaddle cpu version ! \nPlease try: \n" \
+          "\t1. Install paddlepaddle-gpu to run model on GPU \n" \
+          "\t2. Set use_gpu as false in config file to run " \
+          "model on CPU"
+
+    try:
+        if use_gpu and not fluid.is_compiled_with_cuda():
+            logger.error(err)
+            sys.exit(1)
+    except Exception as e:
+        pass
+
+
+def build(config, main_prog, startup_prog, mode):
+    """
+    Build a program using a model and an optimizer
+        1. create feeds
+        2. create a dataloader
+        3. create a model
+        4. create fetchs
+        5. create an optimizer
+
+    Args:
+        config(dict): config
+        main_prog(): main program
+        startup_prog(): startup program
+        is_train(bool): train or valid
+
+    Returns:
+        dataloader(): a bridge between the model and the data
+        fetchs(dict): dict of model outputs(included loss and measures)
+    """
+    with fluid.program_guard(main_prog, startup_prog):
+        with fluid.unique_name.guard():
+            func_infor = config['Architecture']['function']
+            model = create_module(func_infor)(params=config)
+            dataloader, outputs = model(mode=mode)
+            fetch_name_list = list(outputs.keys())
+            fetch_varname_list = [outputs[v].name for v in fetch_name_list]
+            opt_loss_name = None
+            if mode == "train":
+                opt_loss = outputs['total_loss']
+                opt_params = config['Optimizer']
+                optimizer = create_module(opt_params['function'])(opt_params)
+                optimizer.minimize(opt_loss)
+                opt_loss_name = opt_loss.name
+                global_lr = optimizer._global_learning_rate()
+                global_lr.persistable = True
+                fetch_name_list.insert(0, "lr")
+                fetch_varname_list.insert(0, global_lr.name)
+    return (dataloader, fetch_name_list, fetch_varname_list, opt_loss_name)
+
+
+def build_export(config, main_prog, startup_prog):
+    """
+    Build a program using a model and an optimizer
+        1. create feeds
+        2. create a dataloader
+        3. create a model
+        4. create fetchs
+        5. create an optimizer
+
+    Args:
+        config(dict): config
+        main_prog(): main program
+        startup_prog(): startup program
+        is_train(bool): train or valid
+
+    Returns:
+        dataloader(): a bridge between the model and the data
+        fetchs(dict): dict of model outputs(included loss and measures)
+    """
+    with fluid.program_guard(main_prog, startup_prog):
+        with fluid.unique_name.guard():
+            func_infor = config['Architecture']['function']
+            model = create_module(func_infor)(params=config)
+            image, outputs = model(mode='export')
+            fetches_var = [outputs[name] for name in outputs]
+            fetches_var_name = [name for name in outputs]
+    feeded_var_names = [image.name]
+    target_vars = fetches_var
+    return feeded_var_names, target_vars, fetches_var_name
+
+
+def create_multi_devices_program(program, loss_var_name):
+    build_strategy = fluid.BuildStrategy()
+    build_strategy.memory_optimize = False
+    build_strategy.enable_inplace = True
+    exec_strategy = fluid.ExecutionStrategy()
+    exec_strategy.num_iteration_per_drop_scope = 1
+    compile_program = fluid.CompiledProgram(program).with_data_parallel(
+        loss_name=loss_var_name,
+        build_strategy=build_strategy,
+        exec_strategy=exec_strategy)
+    return compile_program
+
+
+def train_eval_det_run(config, exe, train_info_dict, eval_info_dict):
+    train_batch_id = 0
+    log_smooth_window = config['Global']['log_smooth_window']
+    epoch_num = config['Global']['epoch_num']
+    print_batch_step = config['Global']['print_batch_step']
+    eval_batch_step = config['Global']['eval_batch_step']
+    save_epoch_step = config['Global']['save_epoch_step']
+    save_model_dir = config['Global']['save_model_dir']
+    train_stats = TrainingStats(log_smooth_window,
+                                train_info_dict['fetch_name_list'])
+    best_eval_hmean = -1
+    best_batch_id = 0
+    best_epoch = 0
+    train_loader = train_info_dict['reader']
+    for epoch in range(epoch_num):
+        train_loader.start()
+        try:
+            while True:
+                t1 = time.time()
+                train_outs = exe.run(
+                    program=train_info_dict['compile_program'],
+                    fetch_list=train_info_dict['fetch_varname_list'],
+                    return_numpy=False)
+                stats = {}
+                for tno in range(len(train_outs)):
+                    fetch_name = train_info_dict['fetch_name_list'][tno]
+                    fetch_value = np.mean(np.array(train_outs[tno]))
+                    stats[fetch_name] = fetch_value
+                t2 = time.time()
+                train_batch_elapse = t2 - t1
+                train_stats.update(stats)
+                if train_batch_id > 0 and train_batch_id \
+                    % print_batch_step == 0:
+                    logs = train_stats.log()
+                    strs = 'epoch: {}, iter: {}, {}, time: {:.3f}'.format(
+                        epoch, train_batch_id, logs, train_batch_elapse)
+                    logger.info(strs)
+
+                if train_batch_id > 0 and\
+                    train_batch_id % eval_batch_step == 0:
+                    metrics = eval_det_run(exe, config, eval_info_dict, "eval")
+                    hmean = metrics['hmean']
+                    if hmean >= best_eval_hmean:
+                        best_eval_hmean = hmean
+                        best_batch_id = train_batch_id
+                        best_epoch = epoch
+                        save_path = save_model_dir + "/best_accuracy"
+                        save_model(train_info_dict['train_program'], save_path)
+                    strs = 'Test iter: {}, metrics:{}, best_hmean:{:.6f}, best_epoch:{}, best_batch_id:{}'.format(
+                        train_batch_id, metrics, best_eval_hmean, best_epoch,
+                        best_batch_id)
+                    logger.info(strs)
+                train_batch_id += 1
+
+        except fluid.core.EOFException:
+            train_loader.reset()
+
+        if epoch > 0 and epoch % save_epoch_step == 0:
+            save_path = save_model_dir + "/iter_epoch_%d" % (epoch)
+            save_model(train_info_dict['train_program'], save_path)
+    return
+
+
+def train_eval_rec_run(config, exe, train_info_dict, eval_info_dict):
+    train_batch_id = 0
+    log_smooth_window = config['Global']['log_smooth_window']
+    epoch_num = config['Global']['epoch_num']
+    print_batch_step = config['Global']['print_batch_step']
+    eval_batch_step = config['Global']['eval_batch_step']
+    save_epoch_step = config['Global']['save_epoch_step']
+    save_model_dir = config['Global']['save_model_dir']
+    train_stats = TrainingStats(log_smooth_window, ['loss', 'acc'])
+    best_eval_acc = -1
+    best_batch_id = 0
+    best_epoch = 0
+    train_loader = train_info_dict['reader']
+    for epoch in range(epoch_num):
+        train_loader.start()
+        try:
+            while True:
+                t1 = time.time()
+                train_outs = exe.run(
+                    program=train_info_dict['compile_program'],
+                    fetch_list=train_info_dict['fetch_varname_list'],
+                    return_numpy=False)
+                fetch_map = dict(
+                    zip(train_info_dict['fetch_name_list'],
+                        range(len(train_outs))))
+
+                loss = np.mean(np.array(train_outs[fetch_map['total_loss']]))
+                lr = np.mean(np.array(train_outs[fetch_map['lr']]))
+                preds_idx = fetch_map['decoded_out']
+                preds = np.array(train_outs[preds_idx])
+                preds_lod = train_outs[preds_idx].lod()[0]
+                labels_idx = fetch_map['label']
+                labels = np.array(train_outs[labels_idx])
+                labels_lod = train_outs[labels_idx].lod()[0]
+
+                acc, acc_num, img_num = cal_predicts_accuracy(
+                    config['Global']['char_ops'], preds, preds_lod, labels,
+                    labels_lod)
+                t2 = time.time()
+                train_batch_elapse = t2 - t1
+                stats = {'loss': loss, 'acc': acc}
+                train_stats.update(stats)
+                if train_batch_id > 0 and train_batch_id \
+                    % print_batch_step == 0:
+                    logs = train_stats.log()
+                    strs = 'epoch: {}, iter: {}, lr: {:.6f}, {}, time: {:.3f}'.format(
+                        epoch, train_batch_id, lr, logs, train_batch_elapse)
+                    logger.info(strs)
+
+                if train_batch_id > 0 and\
+                    train_batch_id % eval_batch_step == 0:
+                    metrics = eval_rec_run(exe, config, eval_info_dict, "eval")
+                    eval_acc = metrics['avg_acc']
+                    eval_sample_num = metrics['total_sample_num']
+                    if eval_acc > best_eval_acc:
+                        best_eval_acc = eval_acc
+                        best_batch_id = train_batch_id
+                        best_epoch = epoch
+                        save_path = save_model_dir + "/best_accuracy"
+                        save_model(train_info_dict['train_program'], save_path)
+                    strs = 'Test iter: {}, acc:{:.6f}, best_acc:{:.6f}, best_epoch:{}, best_batch_id:{}, eval_sample_num:{}'.format(
+                        train_batch_id, eval_acc, best_eval_acc, best_epoch,
+                        best_batch_id, eval_sample_num)
+                    logger.info(strs)
+                train_batch_id += 1
+
+        except fluid.core.EOFException:
+            train_loader.reset()
+
+        if epoch > 0 and epoch % save_epoch_step == 0:
+            save_path = save_model_dir + "/iter_epoch_%d" % (epoch)
+            save_model(train_info_dict['train_program'], save_path)
+    return
--- a/tools/tmp/eval_det.py
+++ b/tools/tmp/eval_det.py
@ -0,0 +1,134 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import sys
+import time
+import numpy as np
+from copy import deepcopy
+import json
+
+# from paddle.fluid.contrib.model_stat import summary
+
+
+def set_paddle_flags(**kwargs):
+    for key, value in kwargs.items():
+        if os.environ.get(key, None) is None:
+            os.environ[key] = str(value)
+
+
+# NOTE(paddle-dev): All of these flags should be
+# set before `import paddle`. Otherwise, it would
+# not take any effect. 
+set_paddle_flags(
+    FLAGS_eager_delete_tensor_gb=0,  # enable GC to save memory
+)
+
+from paddle import fluid
+from ppocr.utils.utility import create_module
+from ppocr.utils.utility import load_config, merge_config
+import ppocr.data.det.reader_main as reader
+from ppocr.utils.utility import ArgsParser
+from ppocr.utils.check import check_gpu
+from ppocr.utils.checkpoint import load_pretrain, load_checkpoint, save, save_model
+
+from ppocr.utils.utility import initial_logger
+logger = initial_logger()
+from ppocr.utils.eval_utils import eval_det_run
+
+
+def draw_det_res(dt_boxes, config, img_name, ino):
+    if len(dt_boxes) > 0:
+        img_set_path = config['TestReader']['img_set_dir']
+        img_path = img_set_path + img_name
+        import cv2
+        src_im = cv2.imread(img_path)
+        for box in dt_boxes:
+            box = box.astype(np.int32).reshape((-1, 1, 2))
+            cv2.polylines(src_im, [box], True, color=(255, 255, 0), thickness=2)
+        cv2.imwrite("tmp%d.jpg" % ino, src_im)
+
+
+def main():
+    config = load_config(FLAGS.config)
+    merge_config(FLAGS.opt)
+    print(config)
+
+    # check if set use_gpu=True in paddlepaddle cpu version
+    use_gpu = config['Global']['use_gpu']
+    check_gpu(use_gpu)
+
+    place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()
+    exe = fluid.Executor(place)
+
+    det_model = create_module(config['Architecture']['function'])(params=config)
+
+    startup_prog = fluid.Program()
+    eval_prog = fluid.Program()
+    with fluid.program_guard(eval_prog, startup_prog):
+        with fluid.unique_name.guard():
+            eval_loader, eval_outputs = det_model(mode="test")
+            eval_fetch_list = [v.name for v in eval_outputs]
+    eval_prog = eval_prog.clone(for_test=True)
+    exe.run(startup_prog)
+
+    pretrain_weights = config['Global']['pretrain_weights']
+    if pretrain_weights is not None:
+        load_pretrain(exe, eval_prog, pretrain_weights)
+#         fluid.load(eval_prog, pretrain_weights)
+#         def if_exist(var):
+#             return os.path.exists(os.path.join(pretrain_weights, var.name))
+#         fluid.io.load_vars(exe, pretrain_weights, predicate=if_exist, main_program=eval_prog)
+    else:
+        logger.info("Not find pretrain_weights:%s" % pretrain_weights)
+        sys.exit(0)
+
+#     fluid.io.save_inference_model("./output/", feeded_var_names=['image'],
+#         target_vars=eval_outputs, executor=exe, main_program=eval_prog,
+#         model_filename="model", params_filename="params")
+#     sys.exit(-1)
+
+    metrics = eval_det_run(exe, eval_prog, eval_fetch_list, config, "test")
+    logger.info("metrics:{}".format(metrics))
+    logger.info("success!")
+
+
+def test_reader():
+    config = load_config(FLAGS.config)
+    merge_config(FLAGS.opt)
+    print(config)
+    tmp_reader = reader.test_reader(config=config)
+    count = 0
+    print_count = 0
+    import time
+    starttime = time.time()
+    for data in tmp_reader():
+        count += len(data)
+        print_count += 1
+        if print_count % 10 == 0:
+            batch_time = (time.time() - starttime) / print_count
+            print("reader:", count, len(data), batch_time)
+    print("finish reader:", count)
+    print("success")
+
+
+if __name__ == '__main__':
+    parser = ArgsParser()
+    FLAGS = parser.parse_args()
+    main()
+#     test_reader()
--- a/tools/tmp/infer_det.py
+++ b/tools/tmp/infer_det.py
@ -0,0 +1,160 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import sys
+import time
+import numpy as np
+from copy import deepcopy
+import json
+
+# from paddle.fluid.contrib.model_stat import summary
+
+
+def set_paddle_flags(**kwargs):
+    for key, value in kwargs.items():
+        if os.environ.get(key, None) is None:
+            os.environ[key] = str(value)
+
+
+# NOTE(paddle-dev): All of these flags should be
+# set before `import paddle`. Otherwise, it would
+# not take any effect. 
+set_paddle_flags(
+    FLAGS_eager_delete_tensor_gb=0,  # enable GC to save memory
+)
+
+from paddle import fluid
+from ppocr.utils.utility import create_module
+from ppocr.utils.utility import load_config, merge_config
+import ppocr.data.det.reader_main as reader
+from ppocr.utils.utility import ArgsParser
+from ppocr.utils.check import check_gpu
+from ppocr.utils.checkpoint import load_pretrain, load_checkpoint, save, save_model
+
+from ppocr.utils.utility import initial_logger
+logger = initial_logger()
+from ppocr.utils.eval_utils import eval_det_run
+
+
+def draw_det_res(dt_boxes, config, img_name, ino):
+    if len(dt_boxes) > 0:
+        img_set_path = config['TestReader']['img_set_dir']
+        img_path = img_set_path + img_name
+        import cv2
+        src_im = cv2.imread(img_path)
+        for box in dt_boxes:
+            box = box.astype(np.int32).reshape((-1, 1, 2))
+            cv2.polylines(src_im, [box], True, color=(255, 255, 0), thickness=2)
+        cv2.imwrite("tmp%d.jpg" % ino, src_im)
+
+
+def main():
+    config = load_config(FLAGS.config)
+    merge_config(FLAGS.opt)
+    print(config)
+
+    # check if set use_gpu=True in paddlepaddle cpu version
+    use_gpu = config['Global']['use_gpu']
+    check_gpu(use_gpu)
+
+    place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()
+    exe = fluid.Executor(place)
+
+    det_model = create_module(config['Architecture']['function'])(params=config)
+
+    startup_prog = fluid.Program()
+    eval_prog = fluid.Program()
+    with fluid.program_guard(eval_prog, startup_prog):
+        with fluid.unique_name.guard():
+            eval_outputs = det_model(mode="test")
+            eval_fetch_list = [v.name for v in eval_outputs]
+    eval_prog = eval_prog.clone(for_test=True)
+    exe.run(startup_prog)
+
+    pretrain_weights = config['Global']['pretrain_weights']
+    if pretrain_weights is not None:
+        fluid.load(eval_prog, pretrain_weights)
+    else:
+        logger.info("Not find pretrain_weights:%s" % pretrain_weights)
+        sys.exit(0)
+
+    save_res_path = config['Global']['save_res_path']
+    with open(save_res_path, "wb") as fout:
+        test_reader = reader.test_reader(config=config)
+        tackling_num = 0
+        for data in test_reader():
+            img_num = len(data)
+            tackling_num = tackling_num + img_num
+            logger.info("tackling_num:%d", tackling_num)
+            img_list = []
+            ratio_list = []
+            img_name_list = []
+            for ino in range(img_num):
+                img_list.append(data[ino][0])
+                ratio_list.append(data[ino][1])
+                img_name_list.append(data[ino][2])
+            img_list = np.concatenate(img_list, axis=0)
+            outs = exe.run(eval_prog,\
+                feed={'image': img_list},\
+                fetch_list=eval_fetch_list)
+
+            global_params = config['Global']
+            postprocess_params = deepcopy(config["PostProcess"])
+            postprocess_params.update(global_params)
+            postprocess = create_module(postprocess_params['function'])\
+                (params=postprocess_params)
+            dt_boxes_list = postprocess(outs, ratio_list)
+            for ino in range(img_num):
+                dt_boxes = dt_boxes_list[ino]
+                img_name = img_name_list[ino]
+                dt_boxes_json = []
+                for box in dt_boxes:
+                    tmp_json = {"transcription": ""}
+                    tmp_json['points'] = box.tolist()
+                    dt_boxes_json.append(tmp_json)
+                otstr = img_name + "\t" + json.dumps(dt_boxes_json) + "\n"
+                fout.write(otstr.encode())
+                #draw_det_res(dt_boxes, config, img_name, ino)
+    logger.info("success!")
+
+
+def test_reader():
+    config = load_config(FLAGS.config)
+    merge_config(FLAGS.opt)
+    print(config)
+    tmp_reader = reader.test_reader(config=config)
+    count = 0
+    print_count = 0
+    import time
+    starttime = time.time()
+    for data in tmp_reader():
+        count += len(data)
+        print_count += 1
+        if print_count % 10 == 0:
+            batch_time = (time.time() - starttime) / print_count
+            print("reader:", count, len(data), batch_time)
+    print("finish reader:", count)
+    print("success")
+
+
+if __name__ == '__main__':
+    parser = ArgsParser()
+    FLAGS = parser.parse_args()
+    main()
+#     test_reader()
--- a/tools/tmp/infer_rec.py
+++ b/tools/tmp/infer_rec.py
@ -0,0 +1,116 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import time
+import multiprocessing
+import numpy as np
+
+
+def set_paddle_flags(**kwargs):
+    for key, value in kwargs.items():
+        if os.environ.get(key, None) is None:
+            os.environ[key] = str(value)
+
+
+# NOTE(paddle-dev): All of these flags should be
+# set before `import paddle`. Otherwise, it would
+# not take any effect. 
+set_paddle_flags(
+    FLAGS_eager_delete_tensor_gb=0,  # enable GC to save memory
+)
+
+from paddle import fluid
+
+from ppocr.utils.utility import load_config, merge_config
+from ppocr.data.rec.reader_main import test_reader
+
+from ppocr.utils.utility import ArgsParser
+from ppocr.utils.character import CharacterOps, cal_predicts_accuracy
+from ppocr.utils.check import check_gpu
+from ppocr.utils.utility import create_module
+
+from ppocr.utils.utility import initial_logger
+logger = initial_logger()
+
+
+def main():
+    config = load_config(FLAGS.config)
+    merge_config(FLAGS.opt)
+    char_ops = CharacterOps(config['Global'])
+    config['Global']['char_num'] = char_ops.get_char_num()
+
+    # check if set use_gpu=True in paddlepaddle cpu version
+    use_gpu = config['Global']['use_gpu']
+    check_gpu(use_gpu)
+
+    place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()
+    exe = fluid.Executor(place)
+
+    rec_model = create_module(config['Architecture']['function'])(params=config)
+
+    startup_prog = fluid.Program()
+    eval_prog = fluid.Program()
+    with fluid.program_guard(eval_prog, startup_prog):
+        with fluid.unique_name.guard():
+            eval_outputs = rec_model(mode="test")
+            eval_fetch_list = [v.name for v in eval_outputs]
+    eval_prog = eval_prog.clone(for_test=True)
+    exe.run(startup_prog)
+
+    pretrain_weights = config['Global']['pretrain_weights']
+    if pretrain_weights is not None:
+        fluid.load(eval_prog, pretrain_weights)
+
+    test_img_path = config['test_img_path']
+    image_shape = config['Global']['image_shape']
+    blobs = test_reader(image_shape, test_img_path)
+    predict = exe.run(program=eval_prog,
+                      feed={"image": blobs},
+                      fetch_list=eval_fetch_list,
+                      return_numpy=False)
+    preds = np.array(predict[0])
+    if preds.shape[1] == 1:
+        preds = preds.reshape(-1)
+        preds_lod = predict[0].lod()[0]
+        preds_text = char_ops.decode(preds)
+    else:
+        end_pos = np.where(preds[0, :] == 1)[0]
+        if len(end_pos) <= 1:
+            preds_text = preds[0, 1:]
+        else:
+            preds_text = preds[0, 1:end_pos[1]]
+        preds_text = preds_text.reshape(-1)
+        preds_text = char_ops.decode(preds_text)
+
+    fluid.io.save_inference_model(
+        "./output/",
+        feeded_var_names=['image'],
+        target_vars=eval_outputs,
+        executor=exe,
+        main_program=eval_prog,
+        model_filename="model",
+        params_filename="params")
+    print(preds)
+    print(preds_text)
+
+
+if __name__ == '__main__':
+    parser = ArgsParser()
+    FLAGS = parser.parse_args()
+    main()
--- a/tools/tmp/test_rec_benchmark.py
+++ b/tools/tmp/test_rec_benchmark.py
@ -0,0 +1,128 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import time
+import multiprocessing
+import numpy as np
+
+
+def set_paddle_flags(**kwargs):
+    for key, value in kwargs.items():
+        if os.environ.get(key, None) is None:
+            os.environ[key] = str(value)
+
+
+# NOTE(paddle-dev): All of these flags should be
+# set before `import paddle`. Otherwise, it would
+# not take any effect. 
+set_paddle_flags(
+    FLAGS_eager_delete_tensor_gb=0,  # enable GC to save memory
+)
+
+from paddle import fluid
+
+from ppocr.utils.utility import load_config, merge_config
+import ppocr.data.rec.reader_main as reader
+
+from ppocr.utils.utility import ArgsParser
+from ppocr.utils.character import CharacterOps, cal_predicts_accuracy
+from ppocr.utils.check import check_gpu
+from ppocr.utils.utility import create_module
+
+from ppocr.utils.eval_utils import eval_run
+
+from ppocr.utils.utility import initial_logger
+logger = initial_logger()
+
+
+def main():
+    config = load_config(FLAGS.config)
+    merge_config(FLAGS.opt)
+    char_ops = CharacterOps(config['Global'])
+    config['Global']['char_num'] = char_ops.get_char_num()
+
+    # check if set use_gpu=True in paddlepaddle cpu version
+    use_gpu = config['Global']['use_gpu']
+    check_gpu(use_gpu)
+
+    if use_gpu:
+        devices_num = fluid.core.get_cuda_device_count()
+    else:
+        devices_num = int(
+            os.environ.get('CPU_NUM', multiprocessing.cpu_count()))
+
+    place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()
+    exe = fluid.Executor(place)
+
+    rec_model = create_module(config['Architecture']['function'])(params=config)
+
+    startup_prog = fluid.Program()
+    eval_prog = fluid.Program()
+    with fluid.program_guard(eval_prog, startup_prog):
+        with fluid.unique_name.guard():
+            eval_loader, eval_outputs = rec_model(mode="eval")
+            eval_fetch_list = [v.name for v in eval_outputs]
+    eval_prog = eval_prog.clone(for_test=True)
+
+    exe.run(startup_prog)
+    pretrain_weights = config['Global']['pretrain_weights']
+    if pretrain_weights is not None:
+        fluid.load(eval_prog, pretrain_weights)
+
+    eval_data_list = ['IIIT5k_3000', 'SVT', 'IC03_860', 'IC03_867',\
+        'IC13_857', 'IC13_1015', 'IC15_1811', 'IC15_2077', 'SVTP', 'CUTE80']
+    eval_data_dir = config['TestReader']['lmdb_sets_dir']
+    total_forward_time = 0
+    total_evaluation_data_number = 0
+    total_correct_number = 0
+    eval_data_acc_info = {}
+    for eval_data in eval_data_list:
+        config['TestReader']['lmdb_sets_dir'] = \
+            eval_data_dir + "/" + eval_data
+        eval_reader = reader.train_eval_reader(
+            config=config, char_ops=char_ops, mode="test")
+        eval_loader.set_sample_list_generator(eval_reader, places=place)
+
+        start_time = time.time()
+        outs = eval_run(exe, eval_prog, eval_loader, eval_fetch_list, char_ops,
+                        "best", "test")
+        infer_time = time.time() - start_time
+        eval_acc, acc_num, sample_num = outs
+        total_forward_time += infer_time
+        total_evaluation_data_number += sample_num
+        total_correct_number += acc_num
+        eval_data_acc_info[eval_data] = outs
+
+    avg_forward_time = total_forward_time / total_evaluation_data_number
+    avg_acc = total_correct_number * 1.0 / total_evaluation_data_number
+    logger.info('-' * 50)
+    strs = ""
+    for eval_data in eval_data_list:
+        eval_acc, acc_num, sample_num = eval_data_acc_info[eval_data]
+        strs += "\n {}, accuracy:{:.6f}".format(eval_data, eval_acc)
+    strs += "\n average, accuracy:{:.6f}, time:{:.6f}".format(avg_acc,
+                                                              avg_forward_time)
+    logger.info(strs)
+    logger.info('-' * 50)
+
+
+if __name__ == '__main__':
+    parser = ArgsParser()
+    FLAGS = parser.parse_args()
+    main()
--- a/tools/tmp/train_det.py
+++ b/tools/tmp/train_det.py
@ -0,0 +1,216 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import sys
+import time
+import multiprocessing
+import numpy as np
+
+# from paddle.fluid.contrib.model_stat import summary
+
+
+def set_paddle_flags(**kwargs):
+    for key, value in kwargs.items():
+        if os.environ.get(key, None) is None:
+            os.environ[key] = str(value)
+
+
+# NOTE(paddle-dev): All of these flags should be
+# set before `import paddle`. Otherwise, it would
+# not take any effect. 
+set_paddle_flags(
+    FLAGS_eager_delete_tensor_gb=0,  # enable GC to save memory
+)
+
+from paddle import fluid
+from ppocr.utils.utility import create_module
+from ppocr.utils.utility import load_config, merge_config
+import ppocr.data.det.reader_main as reader
+from ppocr.utils.utility import ArgsParser
+from ppocr.utils.character import CharacterOps, cal_predicts_accuracy
+from ppocr.utils.check import check_gpu
+from ppocr.utils.stats import TrainingStats
+from ppocr.utils.checkpoint import load_pretrain, load_checkpoint, save, save_model
+from ppocr.utils.eval_utils import eval_run
+from ppocr.utils.eval_utils import eval_det_run
+
+from ppocr.utils.utility import initial_logger
+logger = initial_logger()
+from ppocr.utils.utility import create_multi_devices_program
+
+
+def main():
+    config = load_config(FLAGS.config)
+    merge_config(FLAGS.opt)
+    print(config)
+
+    alg = config['Global']['algorithm']
+    assert alg in ['EAST', 'DB']
+
+    # check if set use_gpu=True in paddlepaddle cpu version
+    use_gpu = config['Global']['use_gpu']
+    check_gpu(use_gpu)
+
+    place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()
+    exe = fluid.Executor(place)
+
+    det_model = create_module(config['Architecture']['function'])(params=config)
+
+    startup_prog = fluid.Program()
+    train_prog = fluid.Program()
+    with fluid.program_guard(train_prog, startup_prog):
+        with fluid.unique_name.guard():
+            train_loader, train_outputs = det_model(mode="train")
+            train_fetch_list = [v.name for v in train_outputs]
+            train_loss = train_outputs[0]
+            opt_params = config['Optimizer']
+            optimizer = create_module(opt_params['function'])(opt_params)
+            optimizer.minimize(train_loss)
+            global_lr = optimizer._global_learning_rate()
+            global_lr.persistable = True
+            train_fetch_list.append(global_lr.name)
+
+    eval_prog = fluid.Program()
+    with fluid.program_guard(eval_prog, startup_prog):
+        with fluid.unique_name.guard():
+            eval_loader, eval_outputs = det_model(mode="eval")
+            eval_fetch_list = [v.name for v in eval_outputs]
+    eval_prog = eval_prog.clone(for_test=True)
+
+    train_reader = reader.train_reader(config=config)
+    train_loader.set_sample_list_generator(train_reader, places=place)
+
+    exe.run(startup_prog)
+
+    # compile program for multi-devices
+    train_compile_program = create_multi_devices_program(train_prog,
+                                                         train_loss.name)
+
+    pretrain_weights = config['Global']['pretrain_weights']
+    if pretrain_weights is not None:
+        load_pretrain(exe, train_prog, pretrain_weights)
+        print("pretrain weights loaded!")
+
+    train_batch_id = 0
+    if alg == 'EAST':
+        train_log_keys = ['loss_total', 'loss_cls', 'loss_offset']
+    elif alg == 'DB':
+        train_log_keys = [
+            'loss_total', 'loss_shrink', 'loss_threshold', 'loss_binary'
+        ]
+    log_smooth_window = config['Global']['log_smooth_window']
+    epoch_num = config['Global']['epoch_num']
+    print_step = config['Global']['print_step']
+    eval_step = config['Global']['eval_step']
+    save_epoch_step = config['Global']['save_epoch_step']
+    save_dir = config['Global']['save_dir']
+    train_stats = TrainingStats(log_smooth_window, train_log_keys)
+    best_eval_hmean = -1
+    best_batch_id = 0
+    best_epoch = 0
+    for epoch in range(epoch_num):
+        train_loader.start()
+        try:
+            while True:
+                t1 = time.time()
+                train_outs = exe.run(program=train_compile_program,
+                                     fetch_list=train_fetch_list,
+                                     return_numpy=False)
+                loss_total = np.mean(np.array(train_outs[0]))
+                if alg == 'EAST':
+                    loss_cls = np.mean(np.array(train_outs[1]))
+                    loss_offset = np.mean(np.array(train_outs[2]))
+                    stats = {'loss_total':loss_total, 'loss_cls':loss_cls,\
+                        'loss_offset':loss_offset}
+                elif alg == 'DB':
+                    loss_shrink_maps = np.mean(np.array(train_outs[1]))
+                    loss_threshold_maps = np.mean(np.array(train_outs[2]))
+                    loss_binary_maps = np.mean(np.array(train_outs[3]))
+                    stats = {'loss_total':loss_total, 'loss_shrink':loss_shrink_maps, \
+                        'loss_threshold':loss_threshold_maps, 'loss_binary':loss_binary_maps}
+                lr = np.mean(np.array(train_outs[-1]))
+                t2 = time.time()
+                train_batch_elapse = t2 - t1
+
+                # stats = {'loss_total':loss_total, 'loss_cls':loss_cls,\
+                #     'loss_offset':loss_offset}
+                train_stats.update(stats)
+                if train_batch_id > 0 and train_batch_id % print_step == 0:
+                    logs = train_stats.log()
+                    strs = 'epoch: {}, iter: {}, lr: {:.6f}, {}, time: {:.3f}'.format(
+                        epoch, train_batch_id, lr, logs, train_batch_elapse)
+                    logger.info(strs)
+
+                if train_batch_id > 0 and\
+                    train_batch_id % eval_step == 0:
+                    metrics = eval_det_run(exe, eval_prog, eval_fetch_list,
+                                           config, "eval")
+                    hmean = metrics['hmean']
+                    if hmean >= best_eval_hmean:
+                        best_eval_hmean = hmean
+                        best_batch_id = train_batch_id
+                        best_epoch = epoch
+                        save_path = save_dir + "/best_accuracy"
+                        save_model(train_prog, save_path)
+                    strs = 'Test iter: {}, metrics:{}, best_hmean:{:.6f}, best_epoch:{}, best_batch_id:{}'.format(
+                        train_batch_id, metrics, best_eval_hmean, best_epoch,
+                        best_batch_id)
+                    logger.info(strs)
+                train_batch_id += 1
+
+        except fluid.core.EOFException:
+            train_loader.reset()
+
+        if epoch > 0 and epoch % save_epoch_step == 0:
+            save_path = save_dir + "/iter_epoch_%d" % (epoch)
+            save_model(train_prog, save_path)
+
+
+def test_reader():
+    config = load_config(FLAGS.config)
+    merge_config(FLAGS.opt)
+    print(config)
+    tmp_reader = reader.train_reader(config=config)
+    count = 0
+    print_count = 0
+    import time
+    while True:
+        starttime = time.time()
+        count = 0
+        for data in tmp_reader():
+            count += 1
+            if print_count % 1 == 0:
+                batch_time = time.time() - starttime
+                starttime = time.time()
+                print("reader:", count, len(data), batch_time)
+    print("finish reader:", count)
+    print("success")
+
+
+if __name__ == '__main__':
+    parser = ArgsParser()
+    parser.add_argument(
+        "-r",
+        "--resume_checkpoint",
+        default=None,
+        type=str,
+        help="Checkpoint path for resuming training.")
+    FLAGS = parser.parse_args()
+    main()
+    # test_reader()
--- a/tools/tmp/train_rec.py
+++ b/tools/tmp/train_rec.py
@ -0,0 +1,222 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import sys
+import time
+import multiprocessing
+import numpy as np
+
+# from paddle.fluid.contrib.model_stat import summary
+
+
+def set_paddle_flags(**kwargs):
+    for key, value in kwargs.items():
+        if os.environ.get(key, None) is None:
+            os.environ[key] = str(value)
+
+
+# NOTE(paddle-dev): All of these flags should be
+# set before `import paddle`. Otherwise, it would
+# not take any effect. 
+set_paddle_flags(
+    FLAGS_eager_delete_tensor_gb=0,  # enable GC to save memory
+)
+
+from paddle import fluid
+from ppocr.utils.utility import create_module
+from ppocr.utils.utility import load_config, merge_config
+import ppocr.data.rec.reader_main as reader
+from ppocr.utils.utility import ArgsParser
+from ppocr.utils.character import CharacterOps, cal_predicts_accuracy
+from ppocr.utils.check import check_gpu
+from ppocr.utils.stats import TrainingStats
+from ppocr.utils.checkpoint import load_pretrain, load_checkpoint, save, save_model
+from ppocr.utils.eval_utils import eval_run
+
+from ppocr.utils.utility import initial_logger
+logger = initial_logger()
+from ppocr.utils.utility import create_multi_devices_program
+
+
+def main():
+    config = load_config(FLAGS.config)
+    merge_config(FLAGS.opt)
+    char_ops = CharacterOps(config['Global'])
+    config['Global']['char_num'] = char_ops.get_char_num()
+    print(config)
+
+    # check if set use_gpu=True in paddlepaddle cpu version
+    use_gpu = config['Global']['use_gpu']
+    check_gpu(use_gpu)
+
+    place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()
+    exe = fluid.Executor(place)
+
+    rec_model = create_module(config['Architecture']['function'])(params=config)
+
+    startup_prog = fluid.Program()
+    train_prog = fluid.Program()
+    with fluid.program_guard(train_prog, startup_prog):
+        with fluid.unique_name.guard():
+            train_loader, train_outputs = rec_model(mode="train")
+            save_var = train_outputs[1]
+
+            if "gradient_clip" in config['Global']:
+                gradient_clip = config['Global']['gradient_clip']
+                clip = fluid.clip.GradientClipByGlobalNorm(gradient_clip)
+                fluid.clip.set_gradient_clip(clip, program=train_prog)
+
+            train_fetch_list = [v.name for v in train_outputs]
+            train_loss = train_outputs[0]
+            opt_params = config['Optimizer']
+            optimizer = create_module(opt_params['function'])(opt_params)
+            optimizer.minimize(train_loss)
+            global_lr = optimizer._global_learning_rate()
+            global_lr.persistable = True
+            train_fetch_list.append(global_lr.name)
+
+    train_reader = reader.train_eval_reader(
+        config=config, char_ops=char_ops, mode="train")
+    train_loader.set_sample_list_generator(train_reader, places=place)
+
+    eval_prog = fluid.Program()
+    with fluid.program_guard(eval_prog, startup_prog):
+        with fluid.unique_name.guard():
+            eval_loader, eval_outputs = rec_model(mode="eval")
+            eval_fetch_list = [v.name for v in eval_outputs]
+
+    eval_prog = eval_prog.clone(for_test=True)
+    exe.run(startup_prog)
+
+    eval_reader = reader.train_eval_reader(
+        config=config, char_ops=char_ops, mode="eval")
+    eval_loader.set_sample_list_generator(eval_reader, places=place)
+
+    # compile program for multi-devices
+    train_compile_program = create_multi_devices_program(train_prog,
+                                                         train_loss.name)
+
+    pretrain_weights = config['Global']['pretrain_weights']
+    if pretrain_weights is not None:
+        load_pretrain(exe, train_prog, pretrain_weights)
+
+    train_batch_id = 0
+    train_log_keys = ['loss', 'acc']
+    log_smooth_window = config['Global']['log_smooth_window']
+    epoch_num = config['Global']['epoch_num']
+    loss_type = config['Global']['loss_type']
+    print_step = config['Global']['print_step']
+    eval_step = config['Global']['eval_step']
+    save_epoch_step = config['Global']['save_epoch_step']
+    save_dir = config['Global']['save_dir']
+    train_stats = TrainingStats(log_smooth_window, train_log_keys)
+    best_eval_acc = -1
+    best_batch_id = 0
+    best_epoch = 0
+    for epoch in range(epoch_num):
+        train_loader.start()
+        try:
+            while True:
+                t1 = time.time()
+                train_outs = exe.run(program=train_compile_program,
+                                     fetch_list=train_fetch_list,
+                                     return_numpy=False)
+                loss = np.mean(np.array(train_outs[0]))
+                lr = np.mean(np.array(train_outs[-1]))
+
+                preds = np.array(train_outs[1])
+                preds_lod = train_outs[1].lod()[0]
+                labels = np.array(train_outs[2])
+                labels_lod = train_outs[2].lod()[0]
+
+                acc, acc_num, img_num = cal_predicts_accuracy(
+                    char_ops, preds, preds_lod, labels, labels_lod)
+
+                t2 = time.time()
+                train_batch_elapse = t2 - t1
+
+                stats = {'loss': loss, 'acc': acc}
+                train_stats.update(stats)
+                if train_batch_id > 0 and train_batch_id % print_step == 0:
+                    logs = train_stats.log()
+                    strs = 'epoch: {}, iter: {}, lr: {:.6f}, {}, time: {:.3f}'.format(
+                        epoch, train_batch_id, lr, logs, train_batch_elapse)
+                    logger.info(strs)
+
+                if train_batch_id > 0 and train_batch_id % eval_step == 0:
+                    outs = eval_run(exe, eval_prog, eval_loader,
+                                    eval_fetch_list, char_ops, train_batch_id,
+                                    "eval")
+                    eval_acc, acc_num, sample_num = outs
+                    if eval_acc > best_eval_acc:
+                        best_eval_acc = eval_acc
+                        best_batch_id = train_batch_id
+                        best_epoch = epoch
+                        save_path = save_dir + "/best_accuracy"
+                        save_model(train_prog, save_path)
+
+                    strs = 'Test iter: {}, acc:{:.6f}, best_acc:{:.6f}, best_epoch:{}, best_batch_id:{}, sample_num:{}'.format(
+                        train_batch_id, eval_acc, best_eval_acc, best_epoch,
+                        best_batch_id, sample_num)
+                    logger.info(strs)
+                train_batch_id += 1
+
+        except fluid.core.EOFException:
+            train_loader.reset()
+
+        if epoch > 0 and epoch % save_epoch_step == 0:
+            save_path = save_dir + "/iter_epoch_%d" % (epoch)
+            save_model(train_prog, save_path)
+
+
+def test_reader():
+    config = load_config(FLAGS.config)
+    merge_config(FLAGS.opt)
+    char_ops = CharacterOps(config['Global'])
+    config['Global']['char_num'] = char_ops.get_char_num()
+    print(config)
+    #     tmp_reader = reader.train_eval_reader(
+    #         config=cfg, char_ops=char_ops, mode="train")
+    tmp_reader = reader.train_eval_reader(
+        config=config, char_ops=char_ops, mode="eval")
+    count = 0
+    print_count = 0
+    import time
+    starttime = time.time()
+    for data in tmp_reader():
+        count += len(data)
+        print_count += 1
+        if print_count % 10 == 0:
+            batch_time = (time.time() - starttime) / print_count
+            print("reader:", count, len(data), batch_time)
+    print("finish reader:", count)
+    print("success")
+
+
+if __name__ == '__main__':
+    parser = ArgsParser()
+    parser.add_argument(
+        "-r",
+        "--resume_checkpoint",
+        default=None,
+        type=str,
+        help="Checkpoint path for resuming training.")
+    FLAGS = parser.parse_args()
+    main()
+#     test_reader()
--- a/tools/train.py
+++ b/tools/train.py
@ -0,0 +1,113 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import sys
+import time
+import multiprocessing
+import numpy as np
+
+
+def set_paddle_flags(**kwargs):
+    for key, value in kwargs.items():
+        if os.environ.get(key, None) is None:
+            os.environ[key] = str(value)
+
+
+# NOTE(paddle-dev): All of these flags should be
+# set before `import paddle`. Otherwise, it would
+# not take any effect. 
+set_paddle_flags(
+    FLAGS_eager_delete_tensor_gb=0,  # enable GC to save memory
+)
+
+import program
+from paddle import fluid
+from ppocr.utils.utility import initial_logger
+logger = initial_logger()
+from ppocr.data.reader_main import reader_main
+from ppocr.utils.save_load import init_model
+from ppocr.utils.character import CharacterOps
+
+
+def main():
+    config = program.load_config(FLAGS.config)
+    program.merge_config(FLAGS.opt)
+    logger.info(config)
+
+    # check if set use_gpu=True in paddlepaddle cpu version
+    use_gpu = config['Global']['use_gpu']
+    program.check_gpu(True)
+
+    alg = config['Global']['algorithm']
+    assert alg in ['EAST', 'DB', 'Rosetta', 'CRNN', 'STARNet', 'RARE']
+    if alg in ['Rosetta', 'CRNN', 'STARNet', 'RARE']:
+        config['Global']['char_ops'] = CharacterOps(config['Global'])
+
+    place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()
+    startup_program = fluid.Program()
+    train_program = fluid.Program()
+    train_build_outputs = program.build(
+        config, train_program, startup_program, mode='train')
+    train_loader = train_build_outputs[0]
+    train_fetch_name_list = train_build_outputs[1]
+    train_fetch_varname_list = train_build_outputs[2]
+    train_opt_loss_name = train_build_outputs[3]
+
+    eval_program = fluid.Program()
+    eval_build_outputs = program.build(
+        config, eval_program, startup_program, mode='eval')
+    eval_fetch_name_list = eval_build_outputs[1]
+    eval_fetch_varname_list = eval_build_outputs[2]
+    eval_program = eval_program.clone(for_test=True)
+
+    train_reader = reader_main(config=config, mode="train")
+    train_loader.set_sample_list_generator(train_reader, places=place)
+
+    eval_reader = reader_main(config=config, mode="eval")
+
+    exe = fluid.Executor(place)
+    exe.run(startup_program)
+
+    # compile program for multi-devices
+    train_compile_program = program.create_multi_devices_program(
+        train_program, train_opt_loss_name)
+    init_model(config, train_program, exe)
+
+    train_info_dict = {'compile_program':train_compile_program,\
+        'train_program':train_program,\
+        'reader':train_loader,\
+        'fetch_name_list':train_fetch_name_list,\
+        'fetch_varname_list':train_fetch_varname_list}
+
+    eval_info_dict = {'program':eval_program,\
+        'reader':eval_reader,\
+        'fetch_name_list':eval_fetch_name_list,\
+        'fetch_varname_list':eval_fetch_varname_list}
+
+    if alg in ['EAST', 'DB']:
+        program.train_eval_det_run(config, exe, train_info_dict, eval_info_dict)
+    else:
+        program.train_eval_rec_run(config, exe, train_info_dict, eval_info_dict)
+
+
+if __name__ == '__main__':
+    parser = program.ArgsParser()
+    FLAGS = parser.parse_args()
+    main()
+#     test_reader()
				`@ -0,0 +1 @@`
				`<paddle.fluid.core_avx.ProgramDesc object at 0x10d15fab0>`