diff --git a/mindspore/lite/nnacl/fp16/activation_fp16.c b/mindspore/lite/nnacl/fp16/activation_fp16.c
index cb09c4c742b..3ff5bf05c5e 100644
--- a/mindspore/lite/nnacl/fp16/activation_fp16.c
+++ b/mindspore/lite/nnacl/fp16/activation_fp16.c
@@ -99,6 +99,8 @@ int TanhFp16(const float16_t *src, float16_t *dst, int ele_num) {
                                  {28.0f, 28.0f, 28.0f, 28.0f},
                                  {3150.0f, 3150.0f, 3150.0f, 3150.0f},
                                  {62370.0f, 62370.0f, 62370.0f, 62370.0f}};
+  float32x4_t neg_one = {-1.0f, -1.0f, -1.0f, -1.0f};
+  float32x4_t pos_one = {1.0f, 1.0f, 1.0f, 1.0f};
   int count = (ele_num / C4NUM) * C4NUM;
   for (; i < count; i += C4NUM) {
     float32x4_t input = vcvt_f32_f16(vld1_f16(src + i));
@@ -109,7 +111,7 @@ int TanhFp16(const float16_t *src, float16_t *dst, int ele_num) {
     float32x4_t b = vaddq_f32(
       vmulq_f32(vaddq_f32(vmulq_f32(vaddq_f32(vmulq_f32(paramv[3], square), paramv[4]), square), paramv[5]), square),
       paramv[2]);
-    vst1_f16(dst + i, vcvt_f16_f32(vdivq_f32(a, b)));
+    vst1_f16(dst + i, vcvt_f16_f32(vminq_f32(vmaxq_f32(vdivq_f32(a, b), neg_one), pos_one)));
   }
 #endif
   for (; i < ele_num; ++i) {
@@ -118,6 +120,8 @@ int TanhFp16(const float16_t *src, float16_t *dst, int ele_num) {
     float a = (((square + 378.0f) * square + 17325.0f) * square + 135135.0f) * input;
     float b = ((28.0f * square + 3150.0f) * square + 62370.0f) * square + 135135.0f;
     dst[i] = a / b;
+    dst[i] = MSMAX(dst[i], -1);
+    dst[i] = MSMIN(dst[i], 1);
   }
   return NNACL_OK;
 }
diff --git a/mindspore/lite/nnacl/fp32/activation_fp32.c b/mindspore/lite/nnacl/fp32/activation_fp32.c
index a20d55cf4bc..b3bb0d75399 100644
--- a/mindspore/lite/nnacl/fp32/activation_fp32.c
+++ b/mindspore/lite/nnacl/fp32/activation_fp32.c
@@ -109,6 +109,8 @@ int Tanh(const float *src, int length, float *dst) {
                                  {28.0f, 28.0f, 28.0f, 28.0f},
                                  {3150.0f, 3150.0f, 3150.0f, 3150.0f},
                                  {62370.0f, 62370.0f, 62370.0f, 62370.0f}};
+  float32x4_t neg_one = {-1.0f, -1.0f, -1.0f, -1.0f};
+  float32x4_t pos_one = {1.0f, 1.0f, 1.0f, 1.0f};
   int count = (length / C4NUM) * C4NUM;
   for (; i < count; i += C4NUM) {
     float32x4_t input = vld1q_f32(src + i);
@@ -119,7 +121,7 @@ int Tanh(const float *src, int length, float *dst) {
     float32x4_t b = vaddq_f32(
       vmulq_f32(vaddq_f32(vmulq_f32(vaddq_f32(vmulq_f32(paramv[3], square), paramv[4]), square), paramv[5]), square),
       paramv[2]);
-    vst1q_f32(dst + i, vdivq_f32(a, b));
+    vst1q_f32(dst + i, vminq_f32(vmaxq_f32(vdivq_f32(a, b), neg_one), pos_one));
   }
 #endif
   for (; i < length; ++i) {
@@ -128,6 +130,8 @@ int Tanh(const float *src, int length, float *dst) {
     float a = (((square + 378.0f) * square + 17325.0f) * square + 135135.0f) * input;
     float b = ((28.0f * square + 3150.0f) * square + 62370.0f) * square + 135135.0f;
     dst[i] = a / b;
+    dst[i] = MSMAX(dst[i], -1);
+    dst[i] = MSMIN(dst[i], 1);
   }
   return NNACL_OK;
 }
diff --git a/mindspore/lite/nnacl/fp32/reduce_fp32.c b/mindspore/lite/nnacl/fp32/reduce_fp32.c
index a1493865f6a..7363f4cdfa0 100644
--- a/mindspore/lite/nnacl/fp32/reduce_fp32.c
+++ b/mindspore/lite/nnacl/fp32/reduce_fp32.c
@@ -81,6 +81,43 @@ int ReduceSum(int outer_size, int inner_size, int axis_size, const float *src_da
   }
   return NNACL_OK;
 }
+int IntReduceSum(int outer_size, int inner_size, int axis_size, const int *src_data, int *dst_data, int tid,
+                 int thread_num) {
+  if (src_data == NULL || dst_data == NULL) {
+    return NNACL_NULL_PTR;
+  }
+  int i, j;
+#ifdef ENABLE_NEON
+  int block_mod = inner_size % C4NUM;
+  int block_c4 = inner_size - block_mod;
+#endif
+  for (j = tid; j < outer_size; j += thread_num) {
+    const int *outer_src = src_data + j * axis_size * inner_size;
+    int *outer_dst = dst_data + j * inner_size;
+    int k = 0;
+#ifdef ENABLE_NEON
+    for (; k < block_c4; k += C4NUM) {
+      const int *inner_src = outer_src + k;
+      int *inner_dst = outer_dst + k;
+      int32x4_t tmp = {0, 0, 0, 0};
+      for (i = 0; i < axis_size; i++) {
+        tmp = vaddq_s32(tmp, vld1q_s32(inner_src + i * inner_size));
+      }
+      vst1q_s32(inner_dst, tmp);
+    }
+#endif
+    for (; k < inner_size; k++) {
+      const int *inner_src = outer_src + k;
+      int *inner_dst = outer_dst + k;
+      int tmp = 0;
+      for (i = 0; i < axis_size; i++) {
+        tmp += inner_src[i * inner_size];
+      }
+      *inner_dst = tmp;
+    }
+  }
+  return NNACL_OK;
+}
 int ReduceMax(int outer_size, int inner_size, int axis_size, const float *src_data, float *dst_data, int tid,
               int thread_num) {
   if (src_data == NULL || dst_data == NULL) {
diff --git a/mindspore/lite/nnacl/fp32/reduce_fp32.h b/mindspore/lite/nnacl/fp32/reduce_fp32.h
index c8b484d7893..9a6a8789364 100644
--- a/mindspore/lite/nnacl/fp32/reduce_fp32.h
+++ b/mindspore/lite/nnacl/fp32/reduce_fp32.h
@@ -26,6 +26,8 @@ int ReduceMean(int outer_size, int inner_size, int axis_size, const float *src_d
                int thread_num);
 int ReduceSum(int outer_size, int inner_size, int axis_size, const float *src_data, float *dst_data, int tid,
               int thread_num);
+int IntReduceSum(int outer_size, int inner_size, int axis_size, const int *src_data, int *dst_data, int tid,
+                 int thread_num);
 int ReduceMax(int outer_size, int inner_size, int axis_size, const float *src_data, float *dst_data, int tid,
               int thread_num);
 int IntReduceMax(int outer_size, int inner_size, int axis_size, const int *src_data, int *dst_data, int tid,
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/reduce_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/reduce_fp32.cc
index 2f56a6dfd30..4c3982d9464 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/reduce_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/reduce_fp32.cc
@@ -50,6 +50,7 @@ int ReduceCPUKernel::Init() {
   switch (mode_) {
     case static_cast<int>(ReduceMode_ReduceSum): {
       reducer_ = ReduceSum;
+      int_reducer_ = IntReduceSum;
       break;
     }
     case static_cast<int>(ReduceMode_ReduceMean): {
diff --git a/mindspore/lite/src/runtime/kernel/npu/convolution_npu.cc b/mindspore/lite/src/runtime/kernel/npu/convolution_npu.cc
index ca7383c88e1..bbbaee98887 100644
--- a/mindspore/lite/src/runtime/kernel/npu/convolution_npu.cc
+++ b/mindspore/lite/src/runtime/kernel/npu/convolution_npu.cc
@@ -23,10 +23,6 @@ using mindspore::schema::PrimitiveType_Conv2D;
 namespace mindspore::kernel {
 int ConvolutionNPUKernel::IsSupport(const std::vector<lite::Tensor *> &inputs,
                                     const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter) {
-  if (conv_param_->group_ != 1) {
-    MS_LOG(WARNING) << "Only support group equals 1 for npu convolution op";
-    return RET_ERROR;
-  }
   return RET_OK;
 }
 
diff --git a/mindspore/lite/src/runtime/kernel/npu/pooling_npu.cc b/mindspore/lite/src/runtime/kernel/npu/pooling_npu.cc
index 12587c15ca7..7cc37bf49a9 100644
--- a/mindspore/lite/src/runtime/kernel/npu/pooling_npu.cc
+++ b/mindspore/lite/src/runtime/kernel/npu/pooling_npu.cc
@@ -56,10 +56,11 @@ int PoolingNPUKernel::SetPoolingParam() {
 
   if (pooling_param_->round_mode_ == RoundMode_Floor) {  // no use in cpu
     pooling_->set_attr_ceil_mode(0);
+    pooling_->set_attr_data_mode(1);
   } else {
     pooling_->set_attr_ceil_mode(1);
+    pooling_->set_attr_data_mode(0);
   }
-  // todo data mode
   return RET_OK;
 }
 
diff --git a/mindspore/lite/test/models_caffe.cfg b/mindspore/lite/test/models_caffe.cfg
index 7dcb5f461b6..51178bb4d1d 100644
--- a/mindspore/lite/test/models_caffe.cfg
+++ b/mindspore/lite/test/models_caffe.cfg
@@ -73,3 +73,4 @@ ml_video_edit_video_segment_gauss_adaptis_part1
 ml_video_edit_Mnet
 ml_video_edit_hairSeg_have_imageProcessLayer_interpTo145
 ml_video_edit_person_divison_video
+hdc_Face_Aesthetic_MTI_Aesthetic
diff --git a/mindspore/lite/test/models_npu.cfg b/mindspore/lite/test/models_npu.cfg
index 6ded1823b4d..1aca6948b0d 100644
--- a/mindspore/lite/test/models_npu.cfg
+++ b/mindspore/lite/test/models_npu.cfg
@@ -41,3 +41,12 @@ ml_video_edit_img_segment_adaptise.pb 0.5 2
 ml_video_edit_video_segment_gauss_adaptis_part2.pb 3 2
 ml_video_edit_person_divison_pic 8 2
 ml_video_edit_person_divison_video 0.5
+ml_video_edit_imitate_filter.onnx 230
+ml_video_edit_judge.onnx 5
+ml_video_edit_vignet.onnx 0.5
+hdc_Face_Aesthetic_MTI_Aesthetic 0.5
+hdc_Face_Emotion_MTI_Aesthetic.onnx 30
+hdc_Face_Landmark5_MTI_Aesthetic.onnx 0.5
+hdc_Image_Aesthetic_MTI_Aesthetic.onnx 0.5
+hdc_mobilenet_1w_class.onnx 10
+hdc_resnet_1w_class.onnx 5
diff --git a/mindspore/lite/test/models_onnx.cfg b/mindspore/lite/test/models_onnx.cfg
index 9439d0a4230..b612ac93abf 100644
--- a/mindspore/lite/test/models_onnx.cfg
+++ b/mindspore/lite/test/models_onnx.cfg
@@ -45,3 +45,9 @@ ml_video_edit_style_transfer_starry.onnx
 ml_video_edit_judge.onnx
 ml_video_edit_vignet.onnx
 ssd_mobilenet_v1_10.onnx;1,383,640,3
+hdc_Face_Emotion_MTI_Aesthetic.onnx
+hdc_Face_Landmark5_MTI_Aesthetic.onnx
+hdc_Image_Aesthetic_MTI_Aesthetic.onnx
+hdc_mobilenet_1w_class.onnx
+hdc_resnet_1w_class.onnx
+ml_video_edit_imitate_filter.onnx
diff --git a/mindspore/lite/test/models_onnx_fp16.cfg b/mindspore/lite/test/models_onnx_fp16.cfg
index 5f568f46ed8..03213a338cf 100644
--- a/mindspore/lite/test/models_onnx_fp16.cfg
+++ b/mindspore/lite/test/models_onnx_fp16.cfg
@@ -26,7 +26,7 @@ crnn_lite_lstm_v2.onnx;32,32,32,1 0.3
 psenet_lite_mbv2.onnx;1,32,32,3 0.6
 super-resolution-10.onnx;1,224,224,1 4.5
 tinyyolov2-8.onnx;1,416,416,3 5.5
-ml_2012_ocr_cn.onnx 200
+ml_2012_ocr_cn.onnx -1
 #ml_2012_ocr_cn_noLSTM.onnx 1
 candy-9.onnx 5
 mosaic-9.onnx 4
diff --git a/mindspore/lite/test/models_with_multiple_inputs.cfg b/mindspore/lite/test/models_with_multiple_inputs.cfg
index 89ff1ae84e6..88fd55f55a9 100644
--- a/mindspore/lite/test/models_with_multiple_inputs.cfg
+++ b/mindspore/lite/test/models_with_multiple_inputs.cfg
@@ -10,3 +10,4 @@ ml_video_edit_video_segment_gauss_adaptis_part2_pb2tflite.tflite;2
 decoder.onnx;2;1,7,512:1,7
 fasterrcnn_crop.pb;1;420,630,3
 ml_video_edit_person_divison_pic;2
+hdc_tb_cn_neg.tflite;3
diff --git a/mindspore/lite/test/run_benchmark_nets.sh b/mindspore/lite/test/run_benchmark_nets.sh
index c4dfb59f12b..512d27abed1 100755
--- a/mindspore/lite/test/run_benchmark_nets.sh
+++ b/mindspore/lite/test/run_benchmark_nets.sh
@@ -1547,8 +1547,11 @@ function Run_arm64() {
 
         echo 'cd  /data/local/tmp/benchmark_test' > adb_run_cmd.txt
         echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test' >> adb_run_cmd.txt
-        echo './benchmark --modelFile='${model_name}'.fp16.ms --inDataFile=/data/local/tmp/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile=/data/local/tmp/input_output/output/'${model_name}'.ms.out --enableFp16=true --accuracyThreshold='${accuracy_limit} ' --inputShapes='${input_shapes} >> adb_run_cmd.txt
-
+        if [[ $accuracy_limit == "-1" ]]; then
+          echo './benchmark --modelFile='${model_name}'.fp16.ms --inDataFile=/data/local/tmp/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile=/data/local/tmp/input_output/output/'${model_name}'.ms.out --enableFp16=true --inputShapes='${input_shapes} >> adb_run_cmd.txt
+        else
+          echo './benchmark --modelFile='${model_name}'.fp16.ms --inDataFile=/data/local/tmp/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile=/data/local/tmp/input_output/output/'${model_name}'.ms.out --enableFp16=true --accuracyThreshold='${accuracy_limit} ' --inputShapes='${input_shapes} >> adb_run_cmd.txt
+        fi
         cat adb_run_cmd.txt >> "${run_arm64_log_file}"
         adb -s ${device_id} shell < adb_run_cmd.txt >> "${run_arm64_log_file}"
         if [ $? = 0 ]; then
diff --git a/mindspore/lite/tools/converter/parser/caffe/caffe_convolution_parser.cc b/mindspore/lite/tools/converter/parser/caffe/caffe_convolution_parser.cc
index d4fde02cf52..0edbeb208f6 100644
--- a/mindspore/lite/tools/converter/parser/caffe/caffe_convolution_parser.cc
+++ b/mindspore/lite/tools/converter/parser/caffe/caffe_convolution_parser.cc
@@ -19,8 +19,8 @@
 
 namespace mindspore {
 namespace lite {
-STATUS CaffeConvolutionParser::ParseGroupConvolution(schema::PrimitiveT *primitiveT, schema::Conv2DT *attr) {
-  if (attr->group == 1) {
+STATUS CaffeConvolutionParser::ParseDepthwiseConvolution(schema::PrimitiveT *primitiveT, schema::Conv2DT *attr) {
+  if (attr->group == 1 || attr->group != attr->channelOut) {
     return RET_OK;
   }
   std::unique_ptr<schema::DepthwiseConv2DT> depthwiseConv2DParam = std::make_unique<schema::DepthwiseConv2DT>();
@@ -125,9 +125,9 @@ PrimitiveC *CaffeConvolutionParser::ParseLitePrimitive(const caffe::LayerParamet
   primitive->value.type = schema::PrimitiveType_Conv2D;
   primitive->value.value = attr.release();
 
-  status = ParseGroupConvolution(primitive.get(), static_cast<schema::Conv2DT *>(primitive->value.value));
+  status = ParseDepthwiseConvolution(primitive.get(), static_cast<schema::Conv2DT *>(primitive->value.value));
   if (status != RET_OK) {
-    MS_LOG(ERROR) << "Parse group convolution failed";
+    MS_LOG(ERROR) << "Parse depthwise convolution failed";
     return nullptr;
   }
 
diff --git a/mindspore/lite/tools/converter/parser/caffe/caffe_convolution_parser.h b/mindspore/lite/tools/converter/parser/caffe/caffe_convolution_parser.h
index 19cb6eab28e..dd104f99b6d 100644
--- a/mindspore/lite/tools/converter/parser/caffe/caffe_convolution_parser.h
+++ b/mindspore/lite/tools/converter/parser/caffe/caffe_convolution_parser.h
@@ -32,7 +32,7 @@ class CaffeConvolutionParser : public CaffeNodeParser {
   PrimitiveC *ParseLitePrimitive(const caffe::LayerParameter &proto, const caffe::LayerParameter &weight) override;
 
  private:
-  static STATUS ParseGroupConvolution(schema::PrimitiveT *primitiveT, schema::Conv2DT *attr);
+  static STATUS ParseDepthwiseConvolution(schema::PrimitiveT *primitiveT, schema::Conv2DT *attr);
 };
 }  // namespace lite
 }  // namespace mindspore