!10008 [MSLITE] argmin max support mindir

From: @ling_qiao_min Reviewed-by: @zhang_xue_tong,@zhanghaibo5 Signed-off-by: @zhang_xue_tong
2020-12-16 16:07:41 +08:00 · 2020-12-16 16:07:41 +08:00 · 2ac483c7a3
parent c624b04b24 7454b9688c
commit 2ac483c7a3
18 changed files with 280 additions and 1413 deletions
--- a/mindspore/lite/nnacl/arg_min_max.c
+++ b/mindspore/lite/nnacl/arg_min_max.c
@ -1,102 +0,0 @@
-/**
- * Copyright 2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "nnacl/arg_min_max.h"
-#include "nnacl/fp32/arg_min_max_fp32.h"
-
-#define FLOAT_DATA_TYPE 43
-
-void GetCalcParameter(const int *shape, int dims_number, int axis, int *pre_axis_count, int *axis_count,
-                      int *after_axis_count) {
-  *pre_axis_count = 1;
-  for (int i = 0; i < axis; ++i) {
-    *pre_axis_count = (*pre_axis_count) * shape[i];
-  }
-
-  *axis_count = shape[axis];
-
-  *after_axis_count = 1;
-  for (int i = axis + 1; i < dims_number; ++i) {
-    *after_axis_count = (*after_axis_count) * shape[i];
-  }
-}
-
-void ArgMinMaxTopk1(const void *input, void *output, const int *shape, const ArgMinMaxParameter *param) {
-  int pre_axis_count = 1;
-  int axis_count = 1;
-  int after_axis_count = 1;
-  GetCalcParameter(shape, param->dims_size_, param->axis_, &pre_axis_count, &axis_count, &after_axis_count);
-  if (param->data_type_ != FLOAT_DATA_TYPE) {
-    return;
-  }
-  if (param->get_max_) {
-    ArgMax(input, output, param, pre_axis_count, axis_count, after_axis_count);
-  } else {
-    ArgMin(input, output, param, pre_axis_count, axis_count, after_axis_count);
-  }
-}
-
-void ArgMinMaxTopknFp32(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) {
-  if (param->get_max_) {
-    switch (param->axis_) {
-      case 0:
-        ArgMaxDim0(input, output, in_shape, param);
-        break;
-      case 1:
-        ArgMaxDim1(input, output, in_shape, param);
-        break;
-      case 2:
-        ArgMaxDim2(input, output, in_shape, param);
-        break;
-      case 3:
-        ArgMaxDim3(input, output, in_shape, param);
-        break;
-    }
-  } else {
-    switch (param->axis_) {
-      case 0:
-        ArgMinDim0(input, output, in_shape, param);
-        break;
-      case 1:
-        ArgMinDim1(input, output, in_shape, param);
-        break;
-      case 2:
-        ArgMinDim2(input, output, in_shape, param);
-        break;
-      case 3:
-        ArgMinDim3(input, output, in_shape, param);
-        break;
-    }
-  }
-}
-
-void ArgMinMax(const void *input, void *output, const int *in_shape, const ArgMinMaxParameter *param) {
-  if (param->topk_ == 1) {
-    ArgMinMaxTopk1(input, output, in_shape, param);
-    return;
-  }
-
-  switch (param->data_type_) {
-    case FLOAT_DATA_TYPE: {
-      ArgMinMaxTopknFp32(input, output, in_shape, param);
-      return;
-    }
-    default:
-      break;
-  }
-}
-
-#undef FLOAT_DATA_TYPE
-#undef INT8_DATA_TYPE
--- a/mindspore/lite/nnacl/arg_min_max.h
+++ b/mindspore/lite/nnacl/arg_min_max.h
@ -1,29 +0,0 @@
-/**
- * Copyright 2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef MINDSPORE_LITE_NNACL_ARG_MIN_MAX_H_
-#define MINDSPORE_LITE_NNACL_ARG_MIN_MAX_H_
-
-#include "nnacl/arg_min_max_parameter.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-void ArgMinMax(const void *input, void *output, const int *in_shape, const ArgMinMaxParameter *param);
-#ifdef __cplusplus
-}
-#endif
-
-#endif  // MINDSPORE_LITE_NNACL_ARG_MIN_MAX_H_
--- a/mindspore/lite/nnacl/fp32/arg_min_max_fp32.c
+++ b/mindspore/lite/nnacl/fp32/arg_min_max_fp32.c
@ -43,424 +43,8 @@ int ArgCompareDescFp32(const void *a, const void *b) {
  return 0;
 }

-void ArgMaxDim0OutValue(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) {
-  for (int32_t i = 0; i < param->in_strides_[0]; ++i) {
-    for (int j = 0; j < in_shape[0]; ++j) {
-      size_t offset = param->in_strides_[0] * j + i;
-      param->arg_elements_[j].index_ = j;
-      param->arg_elements_[j].data_.f_data_ = input[offset];
-    }
-    qsort(param->arg_elements_, in_shape[0], sizeof(ArgElement), ArgCompareDescFp32);
-    for (int j = 0; j < param->topk_; ++j) {
-      size_t out_offset = j * param->out_strides_[0] + i;
-      output[out_offset] = param->arg_elements_[j].data_.f_data_;
-    }
-  }
-}
-
-void ArgMaxDim0OutIndex(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) {
-  for (int32_t i = 0; i < param->in_strides_[0]; ++i) {
-    for (int j = 0; j < in_shape[0]; ++j) {
-      size_t offset = param->in_strides_[0] * j + i;
-      param->arg_elements_[j].index_ = j;
-      param->arg_elements_[j].data_.f_data_ = input[offset];
-    }
-    qsort(param->arg_elements_, in_shape[0], sizeof(ArgElement), ArgCompareDescFp32);
-    for (int j = 0; j < param->topk_; ++j) {
-      size_t out_offset = j * param->out_strides_[0] + i;
-      output[out_offset] = param->arg_elements_[j].index_;
-    }
-  }
-}
-
-void ArgMinDim0OutValue(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) {
-  for (int32_t i = 0; i < param->in_strides_[0]; ++i) {
-    for (int j = 0; j < in_shape[0]; ++j) {
-      size_t offset = param->in_strides_[0] * j + i;
-      param->arg_elements_[j].index_ = j;
-      param->arg_elements_[j].data_.f_data_ = input[offset];
-    }
-    qsort(param->arg_elements_, in_shape[0], sizeof(ArgElement), ArgCompareAscFp32);
-    for (int j = 0; j < param->topk_; ++j) {
-      size_t out_offset = j * param->out_strides_[0] + i;
-      output[out_offset] = param->arg_elements_[j].data_.f_data_;
-    }
-  }
-}
-
-void ArgMinDim0OutIndex(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) {
-  for (int32_t i = 0; i < param->in_strides_[0]; ++i) {
-    for (int j = 0; j < in_shape[0]; ++j) {
-      size_t offset = param->in_strides_[0] * j + i;
-      param->arg_elements_[j].index_ = j;
-      param->arg_elements_[j].data_.f_data_ = input[offset];
-    }
-    qsort(param->arg_elements_, in_shape[0], sizeof(ArgElement), ArgCompareAscFp32);
-    for (int j = 0; j < param->topk_; ++j) {
-      size_t out_offset = j * param->out_strides_[0] + i;
-      output[out_offset] = param->arg_elements_[j].index_;
-    }
-  }
-}
-
-void ArgMaxDim1OutValue(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) {
-  int in_shape1 = in_shape[1];
-  for (int i = 0; i < in_shape[0]; ++i) {
-    size_t in_dim0_offset = i * param->in_strides_[0];
-    size_t out_dim0_offset = i * param->out_strides_[0];
-    for (int j = 0; j < param->in_strides_[1]; ++j) {
-      for (int k = 0; k < in_shape1; ++k) {
-        size_t offset = param->in_strides_[1] * k + in_dim0_offset + j;
-        param->arg_elements_[k].index_ = k;
-        param->arg_elements_[k].data_.f_data_ = input[offset];
-      }
-      qsort(param->arg_elements_, in_shape1, sizeof(ArgElement), ArgCompareDescFp32);
-      for (int k = 0; k < param->topk_; ++k) {
-        size_t out_offset = out_dim0_offset + j + k * param->out_strides_[1];
-        output[out_offset] = param->arg_elements_[k].data_.f_data_;
-      }
-    }
-  }
-}
-
-void ArgMaxDim1OutIndex(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) {
-  int in_shape1 = in_shape[1];
-  for (int i = 0; i < in_shape[0]; ++i) {
-    size_t in_dim0_offset = i * param->in_strides_[0];
-    size_t out_dim0_offset = i * param->out_strides_[0];
-    for (int j = 0; j < param->in_strides_[1]; ++j) {
-      for (int k = 0; k < in_shape1; ++k) {
-        size_t offset = param->in_strides_[1] * k + in_dim0_offset + j;
-        param->arg_elements_[k].index_ = k;
-        param->arg_elements_[k].data_.f_data_ = input[offset];
-      }
-      qsort(param->arg_elements_, in_shape1, sizeof(ArgElement), ArgCompareDescFp32);
-      for (int k = 0; k < param->topk_; ++k) {
-        size_t out_offset = out_dim0_offset + j + k * param->out_strides_[1];
-        output[out_offset] = param->arg_elements_[k].index_;
-      }
-    }
-  }
-}
-
-void ArgMinDim1OutValue(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) {
-  int in_shape1 = in_shape[1];
-  for (int i = 0; i < in_shape[0]; ++i) {
-    size_t in_dim0_offset = i * param->in_strides_[0];
-    size_t out_dim0_offset = i * param->out_strides_[0];
-    for (int j = 0; j < param->in_strides_[1]; ++j) {
-      for (int k = 0; k < in_shape1; ++k) {
-        size_t offset = param->in_strides_[1] * k + in_dim0_offset + j;
-        param->arg_elements_[k].index_ = k;
-        param->arg_elements_[k].data_.f_data_ = input[offset];
-      }
-      qsort(param->arg_elements_, in_shape1, sizeof(ArgElement), ArgCompareAscFp32);
-      for (int k = 0; k < param->topk_; ++k) {
-        size_t out_offset = out_dim0_offset + j + k * param->out_strides_[1];
-        output[out_offset] = param->arg_elements_[k].data_.f_data_;
-      }
-    }
-  }
-}
-
-void ArgMinDim1OutIndex(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) {
-  int in_shape1 = in_shape[1];
-  for (int i = 0; i < in_shape[0]; ++i) {
-    size_t in_dim0_offset = i * param->in_strides_[0];
-    size_t out_dim0_offset = i * param->out_strides_[0];
-    for (int j = 0; j < param->in_strides_[1]; ++j) {
-      for (int k = 0; k < in_shape1; ++k) {
-        size_t offset = param->in_strides_[1] * k + in_dim0_offset + j;
-        param->arg_elements_[k].index_ = k;
-        param->arg_elements_[k].data_.f_data_ = input[offset];
-      }
-      qsort(param->arg_elements_, in_shape1, sizeof(ArgElement), ArgCompareAscFp32);
-      for (int k = 0; k < param->topk_; ++k) {
-        size_t out_offset = out_dim0_offset + j + k * param->out_strides_[1];
-        output[out_offset] = param->arg_elements_[k].index_;
-      }
-    }
-  }
-}
-
-void ArgMaxDim2OutValue(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) {
-  int in_shape1 = in_shape[1];
-  int in_shape2 = in_shape[2];
-  for (int i = 0; i < in_shape[0]; ++i) {
-    size_t in_dim0_offset = i * param->in_strides_[0];
-    size_t out_dim0_offset = i * param->out_strides_[0];
-    for (int j = 0; j < in_shape1; ++j) {
-      size_t in_dim1_offset = j * param->in_strides_[1] + in_dim0_offset;
-      size_t out_dim1_offset = j * param->out_strides_[1] + out_dim0_offset;
-      for (int k = 0; k < param->in_strides_[2]; ++k) {
-        for (int l = 0; l < in_shape2; ++l) {
-          size_t offset = param->in_strides_[2] * l + k + in_dim1_offset;
-          param->arg_elements_[l].index_ = l;
-          param->arg_elements_[l].data_.f_data_ = input[offset];
-        }
-        qsort(param->arg_elements_, in_shape2, sizeof(ArgElement), ArgCompareDescFp32);
-        for (int l = 0; l < param->topk_; ++l) {
-          size_t out_offset = out_dim1_offset + k + l * param->out_strides_[2];
-          output[out_offset] = param->arg_elements_[l].data_.f_data_;
-        }
-      }
-    }
-  }
-}
-
-void ArgMaxDim2OutIndex(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) {
-  int in_shape1 = in_shape[1];
-  int in_shape2 = in_shape[2];
-  for (int i = 0; i < in_shape[0]; ++i) {
-    size_t in_dim0_offset = i * param->in_strides_[0];
-    size_t out_dim0_offset = i * param->out_strides_[0];
-    for (int j = 0; j < in_shape1; ++j) {
-      size_t in_dim1_offset = j * param->in_strides_[1] + in_dim0_offset;
-      size_t out_dim1_offset = j * param->out_strides_[1] + out_dim0_offset;
-      for (int k = 0; k < param->in_strides_[2]; ++k) {
-        for (int l = 0; l < in_shape2; ++l) {
-          size_t offset = param->in_strides_[2] * l + k + in_dim1_offset;
-          param->arg_elements_[l].index_ = l;
-          param->arg_elements_[l].data_.f_data_ = input[offset];
-        }
-        qsort(param->arg_elements_, in_shape2, sizeof(ArgElement), ArgCompareDescFp32);
-        for (int l = 0; l < param->topk_; ++l) {
-          size_t out_offset = out_dim1_offset + k + l * param->out_strides_[2];
-          output[out_offset] = param->arg_elements_[l].index_;
-        }
-      }
-    }
-  }
-}
-
-void ArgMinDim2OutValue(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) {
-  int in_shape1 = in_shape[1];
-  int in_shape2 = in_shape[2];
-  for (int i = 0; i < in_shape[0]; ++i) {
-    size_t in_dim0_offset = i * param->in_strides_[0];
-    size_t out_dim0_offset = i * param->out_strides_[0];
-    for (int j = 0; j < in_shape1; ++j) {
-      size_t in_dim1_offset = j * param->in_strides_[1] + in_dim0_offset;
-      size_t out_dim1_offset = j * param->out_strides_[1] + out_dim0_offset;
-      for (int k = 0; k < param->in_strides_[2]; ++k) {
-        for (int l = 0; l < in_shape2; ++l) {
-          size_t offset = param->in_strides_[2] * l + k + in_dim1_offset;
-          param->arg_elements_[l].index_ = l;
-          param->arg_elements_[l].data_.f_data_ = input[offset];
-        }
-        qsort(param->arg_elements_, in_shape2, sizeof(ArgElement), ArgCompareAscFp32);
-        for (int l = 0; l < param->topk_; ++l) {
-          size_t out_offset = out_dim1_offset + k + l * param->out_strides_[2];
-          output[out_offset] = param->arg_elements_[l].data_.f_data_;
-        }
-      }
-    }
-  }
-}
-
-void ArgMinDim2OutIndex(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) {
-  int in_shape1 = in_shape[1];
-  int in_shape2 = in_shape[2];
-  for (int i = 0; i < in_shape[0]; ++i) {
-    size_t in_dim0_offset = i * param->in_strides_[0];
-    size_t out_dim0_offset = i * param->out_strides_[0];
-    for (int j = 0; j < in_shape1; ++j) {
-      size_t in_dim1_offset = j * param->in_strides_[1] + in_dim0_offset;
-      size_t out_dim1_offset = j * param->out_strides_[1] + out_dim0_offset;
-      for (int k = 0; k < param->in_strides_[2]; ++k) {
-        for (int l = 0; l < in_shape2; ++l) {
-          size_t offset = param->in_strides_[2] * l + k + in_dim1_offset;
-          param->arg_elements_[l].index_ = l;
-          param->arg_elements_[l].data_.f_data_ = input[offset];
-        }
-        qsort(param->arg_elements_, in_shape2, sizeof(ArgElement), ArgCompareAscFp32);
-        for (int l = 0; l < param->topk_; ++l) {
-          size_t out_offset = out_dim1_offset + k + l * param->out_strides_[2];
-          output[out_offset] = param->arg_elements_[l].index_;
-        }
-      }
-    }
-  }
-}
-
-void ArgMaxDim3OutValue(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) {
-  int in_shape1 = in_shape[1];
-  int in_shape2 = in_shape[2];
-  int in_shape3 = in_shape[3];
-  for (int i = 0; i < in_shape[0]; ++i) {
-    size_t in_dim0_offset = i * param->in_strides_[0];
-    size_t out_dim0_offset = i * param->out_strides_[0];
-    for (int j = 0; j < in_shape1; ++j) {
-      size_t in_dim1_offset = j * param->in_strides_[1] + in_dim0_offset;
-      size_t out_dim1_offset = j * param->out_strides_[1] + out_dim0_offset;
-      for (int k = 0; k < in_shape2; ++k) {
-        size_t in_dim2_offset = k * param->in_strides_[2] + in_dim1_offset;
-        size_t out_dim2_offset = k * param->out_strides_[2] + out_dim1_offset;
-        for (int l = 0; l < in_shape3; ++l) {
-          size_t offset = l + in_dim2_offset;
-          param->arg_elements_[l].index_ = l;
-          param->arg_elements_[l].data_.f_data_ = input[offset];
-        }
-        qsort(param->arg_elements_, in_shape3, sizeof(ArgElement), ArgCompareDescFp32);
-        for (int l = 0; l < param->topk_; ++l) {
-          size_t out_offset = out_dim2_offset + l;
-          output[out_offset] = param->arg_elements_[l].data_.f_data_;
-        }
-      }
-    }
-  }
-}
-
-void ArgMaxDim3OutIndex(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) {
-  int in_shape1 = in_shape[1];
-  int in_shape2 = in_shape[2];
-  int in_shape3 = in_shape[3];
-  for (int i = 0; i < in_shape[0]; ++i) {
-    size_t in_dim0_offset = i * param->in_strides_[0];
-    size_t out_dim0_offset = i * param->out_strides_[0];
-    for (int j = 0; j < in_shape1; ++j) {
-      size_t in_dim1_offset = j * param->in_strides_[1] + in_dim0_offset;
-      size_t out_dim1_offset = j * param->out_strides_[1] + out_dim0_offset;
-      for (int k = 0; k < in_shape2; ++k) {
-        size_t in_dim2_offset = k * param->in_strides_[2] + in_dim1_offset;
-        size_t out_dim2_offset = k * param->out_strides_[2] + out_dim1_offset;
-        for (int l = 0; l < in_shape3; ++l) {
-          size_t offset = l + in_dim2_offset;
-          param->arg_elements_[l].index_ = l;
-          param->arg_elements_[l].data_.f_data_ = input[offset];
-        }
-        qsort(param->arg_elements_, in_shape3, sizeof(ArgElement), ArgCompareDescFp32);
-        for (int l = 0; l < param->topk_; ++l) {
-          size_t out_offset = out_dim2_offset + l;
-          output[out_offset] = param->arg_elements_[l].index_;
-        }
-      }
-    }
-  }
-}
-
-void ArgMinDim3OutValue(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) {
-  int in_shape1 = in_shape[1];
-  int in_shape2 = in_shape[2];
-  int in_shape3 = in_shape[3];
-  for (int i = 0; i < in_shape[0]; ++i) {
-    size_t in_dim0_offset = i * param->in_strides_[0];
-    size_t out_dim0_offset = i * param->out_strides_[0];
-    for (int j = 0; j < in_shape1; ++j) {
-      size_t in_dim1_offset = j * param->in_strides_[1] + in_dim0_offset;
-      size_t out_dim1_offset = j * param->out_strides_[1] + out_dim0_offset;
-      for (int k = 0; k < in_shape2; ++k) {
-        size_t in_dim2_offset = k * param->in_strides_[2] + in_dim1_offset;
-        size_t out_dim2_offset = k * param->out_strides_[2] + out_dim1_offset;
-        for (int l = 0; l < in_shape3; ++l) {
-          size_t offset = l + in_dim2_offset;
-          param->arg_elements_[l].index_ = l;
-          param->arg_elements_[l].data_.f_data_ = input[offset];
-        }
-        qsort(param->arg_elements_, in_shape3, sizeof(ArgElement), ArgCompareAscFp32);
-        for (int l = 0; l < param->topk_; ++l) {
-          size_t out_offset = out_dim2_offset + l;
-          output[out_offset] = param->arg_elements_[l].data_.f_data_;
-        }
-      }
-    }
-  }
-}
-
-void ArgMinDim3OutIndex(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) {
-  int in_shape1 = in_shape[1];
-  int in_shape2 = in_shape[2];
-  int in_shape3 = in_shape[3];
-  for (int i = 0; i < in_shape[0]; ++i) {
-    size_t in_dim0_offset = i * param->in_strides_[0];
-    size_t out_dim0_offset = i * param->out_strides_[0];
-    for (int j = 0; j < in_shape1; ++j) {
-      size_t in_dim1_offset = j * param->in_strides_[1] + in_dim0_offset;
-      size_t out_dim1_offset = j * param->out_strides_[1] + out_dim0_offset;
-      for (int k = 0; k < in_shape2; ++k) {
-        size_t in_dim2_offset = k * param->in_strides_[2] + in_dim1_offset;
-        size_t out_dim2_offset = k * param->out_strides_[2] + out_dim1_offset;
-        for (int l = 0; l < in_shape3; ++l) {
-          size_t offset = l + in_dim2_offset;
-          param->arg_elements_[l].index_ = l;
-          param->arg_elements_[l].data_.f_data_ = input[offset];
-        }
-        qsort(param->arg_elements_, in_shape3, sizeof(ArgElement), ArgCompareAscFp32);
-        for (int l = 0; l < param->topk_; ++l) {
-          size_t out_offset = out_dim2_offset + l;
-          output[out_offset] = param->arg_elements_[l].index_;
-        }
-      }
-    }
-  }
-}
-
-void ArgMaxDim0(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) {
-  if (param->out_value_) {
-    ArgMaxDim0OutValue(input, output, in_shape, param);
-  } else {
-    ArgMaxDim0OutIndex(input, output, in_shape, param);
-  }
-}
-
-void ArgMinDim0(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) {
-  if (param->out_value_) {
-    ArgMinDim0OutValue(input, output, in_shape, param);
-  } else {
-    ArgMinDim0OutIndex(input, output, in_shape, param);
-  }
-}
-
-void ArgMaxDim1(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) {
-  if (param->out_value_) {
-    ArgMaxDim1OutValue(input, output, in_shape, param);
-  } else {
-    ArgMaxDim1OutIndex(input, output, in_shape, param);
-  }
-}
-
-void ArgMinDim1(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) {
-  if (param->out_value_) {
-    ArgMinDim1OutValue(input, output, in_shape, param);
-  } else {
-    ArgMinDim1OutIndex(input, output, in_shape, param);
-  }
-}
-
-void ArgMaxDim2(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) {
-  if (param->out_value_) {
-    ArgMaxDim2OutValue(input, output, in_shape, param);
-  } else {
-    ArgMaxDim2OutIndex(input, output, in_shape, param);
-  }
-}
-
-void ArgMinDim2(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) {
-  if (param->out_value_) {
-    ArgMinDim2OutValue(input, output, in_shape, param);
-  } else {
-    ArgMinDim2OutIndex(input, output, in_shape, param);
-  }
-}
-
-void ArgMaxDim3(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) {
-  if (param->out_value_) {
-    ArgMaxDim3OutValue(input, output, in_shape, param);
-  } else {
-    ArgMaxDim3OutIndex(input, output, in_shape, param);
-  }
-}
-
-void ArgMinDim3(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) {
-  if (param->out_value_) {
-    ArgMinDim3OutValue(input, output, in_shape, param);
-  } else {
-    ArgMinDim3OutIndex(input, output, in_shape, param);
-  }
-}
-
-void ArgMax(const float *input, float *output, const ArgMinMaxParameter *param, int pre_axis_count, int axis_count,
-            int after_axis_count) {
+void ArgMaxTopK1(const float *input, float *output, float *output_value, const ArgMinMaxParameter *param,
+                 int pre_axis_count, int axis_count, int after_axis_count) {
  bool out_value = param->out_value_;
  for (int i = 0; i < pre_axis_count; ++i) {
    size_t output_offset = i * after_axis_count;
@ -476,12 +60,15 @@ void ArgMax(const float *input, float *output, const ArgMinMaxParameter *param,
        }
      }
      output[output_offset + j] = out_value ? value : index;
+      if (output_value != NULL) {
+        output_value[output_offset + j] = value;
+      }
    }
  }
 }

-void ArgMin(const float *input, float *output, const ArgMinMaxParameter *param, int pre_axis_count, int axis_count,
-            int after_axis_count) {
+void ArgMinTopK1(const float *input, float *output, float *output_value, const ArgMinMaxParameter *param,
+                 int pre_axis_count, int axis_count, int after_axis_count) {
  bool out_value = param->out_value_;
  for (int i = 0; i < pre_axis_count; ++i) {
    size_t output_offset = i * after_axis_count;
@ -497,6 +84,173 @@ void ArgMin(const float *input, float *output, const ArgMinMaxParameter *param,
        }
      }
      output[output_offset + j] = out_value ? value : index;
+      if (output_value != NULL) {
+        output_value[output_offset + j] = value;
+      }
    }
  }
 }
+
+void GetCalcParameter(const int *shape, int dims_number, int axis, int *pre_axis_count, int *axis_count,
+                      int *after_axis_count) {
+  *pre_axis_count = 1;
+  for (int i = 0; i < axis; ++i) {
+    *pre_axis_count = (*pre_axis_count) * shape[i];
+  }
+
+  *axis_count = shape[axis];
+
+  *after_axis_count = 1;
+  for (int i = axis + 1; i < dims_number; ++i) {
+    *after_axis_count = (*after_axis_count) * shape[i];
+  }
+}
+
+void ArgMinMaxDim0(const float *input, float *output, float *output_value, const int *in_shape,
+                   const ArgMinMaxParameter *param, COMPARE_FUNCTION compare_func) {
+  for (int32_t i = 0; i < param->in_strides_[0]; ++i) {
+    for (int j = 0; j < in_shape[0]; ++j) {
+      size_t offset = param->in_strides_[0] * j + i;
+      param->arg_elements_[j].index_ = j;
+      param->arg_elements_[j].data_.f_data_ = input[offset];
+    }
+    qsort(param->arg_elements_, in_shape[0], sizeof(ArgElement), *compare_func);
+    for (int j = 0; j < param->topk_; ++j) {
+      size_t out_offset = j * param->out_strides_[0] + i;
+      output[out_offset] = param->out_value_ ? param->arg_elements_[j].data_.f_data_ : param->arg_elements_[j].index_;
+      if (output_value != NULL) {
+        output_value[out_offset] = param->arg_elements_[j].data_.f_data_;
+      }
+    }
+  }
+  return;
+}
+
+void ArgMinMaxDim1(const float *input, float *output, float *output_value, const int *in_shape,
+                   const ArgMinMaxParameter *param, COMPARE_FUNCTION compare_func) {
+  int in_shape1 = in_shape[1];
+  for (int i = 0; i < in_shape[0]; ++i) {
+    size_t in_dim0_offset = i * param->in_strides_[0];
+    size_t out_dim0_offset = i * param->out_strides_[0];
+    for (int j = 0; j < param->in_strides_[1]; ++j) {
+      for (int k = 0; k < in_shape1; ++k) {
+        size_t offset = param->in_strides_[1] * k + in_dim0_offset + j;
+        param->arg_elements_[k].index_ = k;
+        param->arg_elements_[k].data_.f_data_ = input[offset];
+      }
+      qsort(param->arg_elements_, in_shape1, sizeof(ArgElement), *compare_func);
+      for (int k = 0; k < param->topk_; ++k) {
+        size_t out_offset = out_dim0_offset + j + k * param->out_strides_[1];
+        output[out_offset] = param->out_value_ ? param->arg_elements_[k].data_.f_data_ : param->arg_elements_[k].index_;
+        if (output_value != NULL) {
+          output_value[out_offset] = param->arg_elements_[k].data_.f_data_;
+        }
+      }
+    }
+  }
+  return;
+}
+
+void ArgMinMaxDim2(const float *input, float *output, float *output_value, const int *in_shape,
+                   const ArgMinMaxParameter *param, COMPARE_FUNCTION compare_func) {
+  int in_shape1 = in_shape[1];
+  int in_shape2 = in_shape[2];
+  for (int i = 0; i < in_shape[0]; ++i) {
+    size_t in_dim0_offset = i * param->in_strides_[0];
+    size_t out_dim0_offset = i * param->out_strides_[0];
+    for (int j = 0; j < in_shape1; ++j) {
+      size_t in_dim1_offset = j * param->in_strides_[1] + in_dim0_offset;
+      size_t out_dim1_offset = j * param->out_strides_[1] + out_dim0_offset;
+      for (int k = 0; k < param->in_strides_[2]; ++k) {
+        for (int l = 0; l < in_shape2; ++l) {
+          size_t offset = param->in_strides_[2] * l + k + in_dim1_offset;
+          param->arg_elements_[l].index_ = l;
+          param->arg_elements_[l].data_.f_data_ = input[offset];
+        }
+        qsort(param->arg_elements_, in_shape2, sizeof(ArgElement), *compare_func);
+        for (int l = 0; l < param->topk_; ++l) {
+          size_t out_offset = out_dim1_offset + k + l * param->out_strides_[2];
+
+          output[out_offset] =
+            param->out_value_ ? param->arg_elements_[l].data_.f_data_ : param->arg_elements_[l].index_;
+          if (output_value != NULL) {
+            output_value[out_offset] = param->arg_elements_[l].data_.f_data_;
+          }
+        }
+      }
+    }
+  }
+}
+
+void ArgMinMaxDim3(const float *input, float *output, float *output_value, const int *in_shape,
+                   const ArgMinMaxParameter *param, COMPARE_FUNCTION compare_func) {
+  int in_shape1 = in_shape[1];
+  int in_shape2 = in_shape[2];
+  int in_shape3 = in_shape[3];
+  for (int i = 0; i < in_shape[0]; ++i) {
+    size_t in_dim0_offset = i * param->in_strides_[0];
+    size_t out_dim0_offset = i * param->out_strides_[0];
+    for (int j = 0; j < in_shape1; ++j) {
+      size_t in_dim1_offset = j * param->in_strides_[1] + in_dim0_offset;
+      size_t out_dim1_offset = j * param->out_strides_[1] + out_dim0_offset;
+      for (int k = 0; k < in_shape2; ++k) {
+        size_t in_dim2_offset = k * param->in_strides_[2] + in_dim1_offset;
+        size_t out_dim2_offset = k * param->out_strides_[2] + out_dim1_offset;
+        for (int l = 0; l < in_shape3; ++l) {
+          size_t offset = l + in_dim2_offset;
+          param->arg_elements_[l].index_ = l;
+          param->arg_elements_[l].data_.f_data_ = input[offset];
+        }
+        qsort(param->arg_elements_, in_shape3, sizeof(ArgElement), *compare_func);
+        for (int l = 0; l < param->topk_; ++l) {
+          size_t out_offset = out_dim2_offset + l;
+          output[out_offset] =
+            param->out_value_ ? param->arg_elements_[l].data_.f_data_ : param->arg_elements_[l].index_;
+          if (output_value != NULL) {
+            output_value[out_offset] = param->arg_elements_[l].data_.f_data_;
+          }
+        }
+      }
+    }
+  }
+}
+
+void ArgMinMaxFp32(const float *input, float *output, float *output_value, const int *in_shape,
+                   const ArgMinMaxParameter *param) {
+  if (param->topk_ == 1) {
+    int pre_axis_count = 1;
+    int axis_count = 1;
+    int after_axis_count = 1;
+    GetCalcParameter(in_shape, param->dims_size_, param->axis_, &pre_axis_count, &axis_count, &after_axis_count);
+
+    if (param->get_max_) {
+      ArgMaxTopK1(input, output, output_value, param, pre_axis_count, axis_count, after_axis_count);
+    } else {
+      ArgMinTopK1(input, output, output_value, param, pre_axis_count, axis_count, after_axis_count);
+    }
+    return;
+  }
+
+  COMPARE_FUNCTION compare_function = NULL;
+  if (param->get_max_) {
+    compare_function = ArgCompareDescFp32;
+  } else {
+    compare_function = ArgCompareAscFp32;
+  }
+
+  switch (param->axis_) {
+    case 0:
+      ArgMinMaxDim0(input, output, output_value, in_shape, param, compare_function);
+      break;
+    case 1:
+      ArgMinMaxDim1(input, output, output_value, in_shape, param, compare_function);
+      break;
+    case 2:
+      ArgMinMaxDim2(input, output, output_value, in_shape, param, compare_function);
+      break;
+    case 3:
+      ArgMinMaxDim3(input, output, output_value, in_shape, param, compare_function);
+      break;
+  }
+  return;
+}
--- a/mindspore/lite/nnacl/fp32/arg_min_max_fp32.h
+++ b/mindspore/lite/nnacl/fp32/arg_min_max_fp32.h
@ -18,21 +18,13 @@

 #include "nnacl/arg_min_max_parameter.h"

+typedef int (*COMPARE_FUNCTION)(const void *a, const void *b);
+
 #ifdef __cplusplus
 extern "C" {
 #endif
-void ArgMax(const float *input, float *output, const ArgMinMaxParameter *param, int pre_axis_count, int axis_count,
-            int after_axis_count);
-void ArgMin(const float *input, float *output, const ArgMinMaxParameter *param, int pre_axis_count, int axis_count,
-            int after_axis_count);
-void ArgMaxDim0(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param);
-void ArgMinDim0(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param);
-void ArgMaxDim1(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param);
-void ArgMinDim1(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param);
-void ArgMaxDim2(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param);
-void ArgMinDim2(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param);
-void ArgMaxDim3(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param);
-void ArgMinDim3(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param);
+void ArgMinMaxFp32(const float *input, float *output, float *output_value, const int *in_shape,
+                   const ArgMinMaxParameter *param);
 #ifdef __cplusplus
 }
 #endif
--- a/mindspore/lite/nnacl/fp32/instance_norm_fp32.c
+++ b/mindspore/lite/nnacl/fp32/instance_norm_fp32.c
@ -1,47 +0,0 @@
-/**
- * Copyright 2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "nnacl/fp32/instance_norm_fp32.h"
-#include <math.h>
-#include "nnacl/errorcode.h"
-#include "nnacl/op_base.h"
-
-int InstanceNorm(int outer_size, int inner_size, const float *src_data, const float *scale_data, const float *bias_data,
-                 const InstanceNormParameter *param, float *dst_data, int task_id, int thread_num) {
-  if (src_data == NULL || dst_data == NULL || scale_data == NULL || bias_data == NULL) {
-    return NNACL_NULL_PTR;
-  }
-  for (int j = task_id; j < outer_size; j += thread_num) {
-    int offset = (j / param->channel_) * inner_size * param->channel_;
-    const float *src = src_data + offset;
-    float *dst = dst_data + offset;
-    float mean = 0.0f;
-    float square_mean = 0.0f;
-    for (int i = 0; i < inner_size; i++) {
-      int idx = j % param->channel_ + i * param->channel_;
-      mean += src[idx];
-      square_mean += src[idx] * src[idx];
-    }
-    mean /= (float)inner_size;
-    square_mean /= (float)inner_size;
-    const float deno = 1 / sqrtf(square_mean - mean * mean + param->epsilon_);
-    for (int i = 0; i < inner_size; ++i) {
-      int idx = j % param->channel_ + i * param->channel_;
-      int scale_idx = (j / param->channel_) * param->channel_ + j % param->channel_;
-      dst[idx] = ((src[idx] - mean) * deno) * scale_data[scale_idx] + bias_data[scale_idx];
-    }
-  }
-  return NNACL_OK;
-}
--- a/mindspore/lite/nnacl/fp32/instance_norm_fp32.h
+++ b/mindspore/lite/nnacl/fp32/instance_norm_fp32.h
@ -1,32 +0,0 @@
-/**
- * Copyright 2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef MINDSPORE_LITE_NNACL_FP32_INSTANCE_NORM_H_
-#define MINDSPORE_LITE_NNACL_FP32_INSTANCE_NORM_H_
-
-#include "nnacl/op_base.h"
-#include "nnacl/instance_norm_parameter.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-int InstanceNorm(int outer_size, int inner_size, const float *src_data, const float *scale_data, const float *bias_data,
-                 const InstanceNormParameter *param, float *dst_data, int task_id, int thread_num);
-#ifdef __cplusplus
-}
-#endif
-
-#endif  // MINDSPORE_LITE_NNACL_FP32_INSTANCE_NORM_H_
--- a/mindspore/lite/src/ops/populate/argmax_populate.cc
+++ b/mindspore/lite/src/ops/populate/argmax_populate.cc
@ -35,6 +35,7 @@ OpParameter *PopulateArgMaxParameter(const mindspore::lite::PrimitiveC *primitiv
  arg_param->axis_type_ = param->GetAxisType();
  arg_param->out_value_ = param->GetOutMaxValue();
  arg_param->keep_dims_ = param->GetKeepDims();
+  arg_param->get_max_ = true;
  return reinterpret_cast<OpParameter *>(arg_param);
 }

--- a/mindspore/lite/src/ops/populate/argmin_populate.cc
+++ b/mindspore/lite/src/ops/populate/argmin_populate.cc
@ -35,6 +35,7 @@ OpParameter *PopulateArgMinParameter(const mindspore::lite::PrimitiveC *primitiv
  arg_param->axis_type_ = param->GetAxisType();
  arg_param->out_value_ = param->GetOutMaxValue();
  arg_param->keep_dims_ = param->GetKeepDims();
+  arg_param->get_max_ = false;
  return reinterpret_cast<OpParameter *>(arg_param);
 }

--- a/mindspore/lite/src/runtime/kernel/arm/base/arg_min_max_base.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/base/arg_min_max_base.cc
@ -1,118 +0,0 @@
-/**
- * Copyright 2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "src/runtime/kernel/arm/base/arg_min_max_base.h"
-#include "nnacl/arg_min_max.h"
-#include "src/runtime/kernel/arm/fp32/argminmax_fp32.h"
-#include "nnacl/arithmetic_common.h"
-#include "schema/model_generated.h"
-#include "src/kernel_registry.h"
-#include "include/errorcode.h"
-#include "include/context.h"
-
-using mindspore::lite::KernelRegistrar;
-using mindspore::lite::RET_ERROR;
-using mindspore::lite::RET_FORMAT_ERR;
-using mindspore::lite::RET_OK;
-using mindspore::lite::RET_PARAM_INVALID;
-using mindspore::schema::PrimitiveType_ArgMax;
-using mindspore::schema::PrimitiveType_ArgMin;
-
-namespace mindspore::kernel {
-int ArgMinMaxBaseCPUKernel::Init() {
-  auto param = reinterpret_cast<ArgMinMaxParameter *>(op_parameter_);
-  switch (op_parameter_->type_) {
-    case PrimitiveType_ArgMax:
-      param->get_max_ = true;
-      break;
-    case PrimitiveType_ArgMin:
-      param->get_max_ = false;
-      break;
-    default:
-      MS_LOG(ERROR) << "Unexpected type " << op_parameter_->type_;
-      return RET_ERROR;
-  }
-
-  return RET_OK;
-}
-
-int ArgMinMaxBaseCPUKernel::ReSize() {
-  auto in_shape = in_tensors_.at(0)->shape();
-  auto dims_size = in_shape.size();
-  auto param = reinterpret_cast<ArgMinMaxParameter *>(op_parameter_);
-  int axis = param->axis_ < 0 ? param->axis_ + dims_size : param->axis_;
-  param->axis_ = axis;
-  param->dims_size_ = dims_size;
-  if (param->topk_ <= 0) {
-    MS_LOG(ERROR) << "Invalid topk " << param->topk_;
-    return RET_PARAM_INVALID;
-  }
-  param->topk_ = MSMIN(param->topk_, in_shape.at(axis));
-  ComputeStrides(in_shape.data(), param->in_strides_, in_shape.size());
-  auto out_shape = out_tensors_.at(0)->shape();
-  ComputeStrides(out_shape.data(), param->out_strides_, out_shape.size());
-  return RET_OK;
-}
-
-int ArgMinMaxBaseCPUKernel::Run() {
-  auto input_data = in_tensors_.at(0)->MutableData();
-  auto output_data = out_tensors_.at(0)->MutableData();
-
-  auto shape = in_tensors_.at(0)->shape();
-
-  auto param = reinterpret_cast<ArgMinMaxParameter *>(op_parameter_);
-  MS_ASSERT(context_->allocator != nullptr);
-  if (param->topk_ > 1 || param->keep_dims_) {
-    param->arg_elements_ =
-      reinterpret_cast<ArgElement *>(context_->allocator->Malloc(sizeof(ArgElement) * shape[param->axis_]));
-    if (param->arg_elements_ == nullptr) {
-      MS_LOG(ERROR) << "malloc memroy fail!";
-      return RET_ERROR;
-    }
-  }
-  ArgMinMax(input_data, output_data, reinterpret_cast<const int *>(shape.data()), param);
-  context_->allocator->Free(param->arg_elements_);
-  param->arg_elements_ = nullptr;
-  return RET_OK;
-}
-
-kernel::LiteKernel *CpuArgMinMaxFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
-                                                  const std::vector<lite::Tensor *> &outputs, OpParameter *op_parameter,
-                                                  const lite::InnerContext *ctx, const kernel::KernelKey &desc,
-                                                  const mindspore::lite::PrimitiveC *primitive) {
-  if (op_parameter == nullptr) {
-    MS_LOG(ERROR) << "Input op_parameter is nullptr!";
-    return nullptr;
-  }
-  auto kernel = new (std::nothrow) ArgMinMaxCPUKernel(op_parameter, inputs, outputs, ctx, primitive);
-  if (kernel == nullptr) {
-    MS_LOG(ERROR) << "new ArgMinMaxCPUKernel fail!";
-    free(op_parameter);
-    return nullptr;
-  }
-  auto ret = kernel->Init();
-  if (ret != RET_OK) {
-    MS_LOG(ERROR) << "Init kernel failed, name: " << op_parameter->name_ << ", type: "
-                  << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(op_parameter->type_));
-    delete kernel;
-    return nullptr;
-  }
-  return kernel;
-}
-
-REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_ArgMax, CpuArgMinMaxFp32KernelCreator)
-REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_ArgMin, CpuArgMinMaxFp32KernelCreator)
-
-}  // namespace mindspore::kernel
--- a/mindspore/lite/src/runtime/kernel/arm/base/arg_min_max_base.h
+++ b/mindspore/lite/src/runtime/kernel/arm/base/arg_min_max_base.h
@ -1,41 +0,0 @@
-/**
- * Copyright 2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_ARG_MIN_MAX_BASE_H_
-#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_ARG_MIN_MAX_BASE_H_
-
-#include <vector>
-#include "src/lite_kernel.h"
-
-namespace mindspore::kernel {
-class ArgMinMaxBaseCPUKernel : public LiteKernel {
- public:
-  ArgMinMaxBaseCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                         const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
-                         const mindspore::lite::PrimitiveC *primitive)
-      : LiteKernel(parameter, inputs, outputs, ctx, primitive) {}
-
-  virtual ~ArgMinMaxBaseCPUKernel() = default;
-
-  int Init() override;
-  int ReSize() override;
-  int Run() override;
-
- private:
-};
-}  // namespace mindspore::kernel
-
-#endif  // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_ARG_MIN_MAX_BASE_H_
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/argminmax_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/argminmax_fp32.cc
@ -15,11 +15,8 @@
 */

 #include "src/runtime/kernel/arm/fp32/argminmax_fp32.h"
-#include <vector>
 #include "schema/model_generated.h"
 #include "src/kernel_registry.h"
-#include "nnacl/arg_min_max.h"
-#include "include/errorcode.h"

 using mindspore::kernel::KERNEL_ARCH::kCPU;
 using mindspore::lite::KernelRegistrar;
@ -30,22 +27,79 @@ using mindspore::schema::PrimitiveType_ArgMin;

 namespace mindspore::kernel {
 int ArgMinMaxCPUKernel::Init() {
-  auto ret = ArgMinMaxBaseCPUKernel::Init();
-  if (ret != RET_OK) {
-    return ret;
-  }
-  auto param = reinterpret_cast<ArgMinMaxParameter *>(op_parameter_);
-  param->data_type_ = kNumberTypeFloat32;
+  arg_param_->data_type_ = kNumberTypeFloat32;
  if (!InferShapeDone()) {
    return RET_OK;
  }
  return ReSize();
 }

-int ArgMinMaxCPUKernel::ReSize() { return ArgMinMaxBaseCPUKernel::ReSize(); }
+int ArgMinMaxCPUKernel::ReSize() {
+  auto in_shape = in_tensors_.at(0)->shape();
+  auto dims_size = in_shape.size();
+  int axis = arg_param_->axis_ < 0 ? arg_param_->axis_ + dims_size : arg_param_->axis_;
+  arg_param_->axis_ = axis;
+  arg_param_->dims_size_ = dims_size;
+  if (arg_param_->topk_ <= 0) {
+    MS_LOG(ERROR) << "Invalid topk " << arg_param_->topk_;
+    return RET_ERROR;
+  }
+  arg_param_->topk_ = MSMIN(arg_param_->topk_, in_shape.at(axis));
+  ComputeStrides(in_shape.data(), arg_param_->in_strides_, in_shape.size());
+  auto out_shape = out_tensors_.at(0)->shape();
+  ComputeStrides(out_shape.data(), arg_param_->out_strides_, out_shape.size());
+  return RET_OK;
+}

 int ArgMinMaxCPUKernel::Run() {
-  auto ret = ArgMinMaxBaseCPUKernel::Run();
-  return ret;
+  float *input_data = reinterpret_cast<float *>(in_tensors_.at(0)->data_c());
+  float *output_data = reinterpret_cast<float *>(out_tensors_.at(0)->data_c());
+  float *output_value = nullptr;
+  if (out_tensors_.size() == 2) {
+    output_value = reinterpret_cast<float *>(out_tensors_.at(1)->data_c());
+  }
+
+  auto shape = in_tensors_.at(0)->shape();
+
+  MS_ASSERT(context_->allocator != nullptr);
+  if (arg_param_->topk_ > 1 || arg_param_->keep_dims_) {
+    arg_param_->arg_elements_ =
+      reinterpret_cast<ArgElement *>(context_->allocator->Malloc(sizeof(ArgElement) * shape[arg_param_->axis_]));
+    if (arg_param_->arg_elements_ == nullptr) {
+      MS_LOG(ERROR) << "malloc memroy fail!";
+      return RET_ERROR;
+    }
+  }
+  ArgMinMaxFp32(input_data, output_data, output_value, reinterpret_cast<const int *>(shape.data()), arg_param_);
+  context_->allocator->Free(arg_param_->arg_elements_);
+  arg_param_->arg_elements_ = nullptr;
+  return RET_OK;
 }
+
+kernel::LiteKernel *CpuArgMinMaxFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
+                                                  const std::vector<lite::Tensor *> &outputs, OpParameter *op_parameter,
+                                                  const lite::InnerContext *ctx, const kernel::KernelKey &desc,
+                                                  const mindspore::lite::PrimitiveC *primitive) {
+  if (op_parameter == nullptr) {
+    MS_LOG(ERROR) << "Input op_parameter is nullptr!";
+    return nullptr;
+  }
+  auto kernel = new (std::nothrow) ArgMinMaxCPUKernel(op_parameter, inputs, outputs, ctx, primitive);
+  if (kernel == nullptr) {
+    MS_LOG(ERROR) << "new ArgMinMaxCPUKernel fail!";
+    free(op_parameter);
+    return nullptr;
+  }
+  auto ret = kernel->Init();
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << "Init kernel failed, name: " << op_parameter->name_ << ", type: "
+                  << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(op_parameter->type_));
+    delete kernel;
+    return nullptr;
+  }
+  return kernel;
+}
+
+REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_ArgMax, CpuArgMinMaxFp32KernelCreator)
+REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_ArgMin, CpuArgMinMaxFp32KernelCreator)
 }  // namespace mindspore::kernel
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/argminmax_fp32.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/argminmax_fp32.h
@ -17,21 +17,29 @@
 #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_ARGMINMAX_H_

 #include <vector>
-#include "src/runtime/kernel/arm/base/arg_min_max_base.h"
+#include "include/errorcode.h"
+#include "nnacl/fp32/arg_min_max_fp32.h"
+#include "nnacl/arithmetic_common.h"
+#include "src/lite_kernel.h"

 namespace mindspore::kernel {
-class ArgMinMaxCPUKernel : public ArgMinMaxBaseCPUKernel {
+class ArgMinMaxCPUKernel : public LiteKernel {
 public:
  ArgMinMaxCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
                     const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                     const mindspore::lite::PrimitiveC *primitive)
-      : ArgMinMaxBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {}
+      : LiteKernel(parameter, inputs, outputs, ctx, primitive) {
+    arg_param_ = reinterpret_cast<ArgMinMaxParameter *>(op_parameter_);
+  }

  ~ArgMinMaxCPUKernel() = default;

  int Init() override;
  int ReSize() override;
  int Run() override;
+
+ private:
+  ArgMinMaxParameter *arg_param_;
 };
 }  // namespace mindspore::kernel

--- a/mindspore/lite/src/runtime/kernel/arm/fp32/instance_norm_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/instance_norm_fp32.cc
@ -1,107 +0,0 @@
-/**
- * Copyright 2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "src/runtime/kernel/arm/fp32/instance_norm_fp32.h"
-#include <vector>
-#include "schema/model_generated.h"
-#include "src/kernel_registry.h"
-#include "include/errorcode.h"
-
-using mindspore::kernel::KERNEL_ARCH::kCPU;
-using mindspore::lite::KernelRegistrar;
-using mindspore::lite::RET_ERROR;
-using mindspore::lite::RET_OK;
-using mindspore::schema::PrimitiveType_InstanceNorm;
-
-namespace mindspore::kernel {
-int InstanceNormCPUKernel::Init() {
-  if (!InferShapeDone()) {
-    return RET_OK;
-  }
-  return ReSize();
-}
-
-int InstanceNormCPUKernel::ReSize() {
-  auto input_shapes = in_tensors_.front()->shape();
-  auto n_dim = input_shapes.size();
-  outer_size_ = input_shapes.at(0) * input_shapes.at(n_dim - 1);
-  inner_size_ = 1;
-  for (size_t i = 0; i < n_dim - 1; ++i) {
-    inner_size_ *= input_shapes.at(i);
-  }
-  param_->channel_ = input_shapes.at(n_dim - 1);
-  return RET_OK;
-}
-
-int InstanceNormCPUKernel::DoInstanceNorm(int task_id) {
-  int ret = InstanceNorm(outer_size_, inner_size_, src_data_, scale_data_, bias_data_, param_, dst_data_, task_id,
-                         op_parameter_->thread_num_);
-  if (ret != RET_OK) {
-    MS_LOG(ERROR) << "DoInstanceNorm error error_code[" << ret << "]";
-    return ret;
-  }
-  return RET_OK;
-}
-
-int InstanceNormRun(void *cdata, int task_id) {
-  auto kernel = reinterpret_cast<InstanceNormCPUKernel *>(cdata);
-  auto ret = kernel->DoInstanceNorm(task_id);
-  if (ret != RET_OK) {
-    MS_LOG(ERROR) << "InstanceNormRun error task_id[" << task_id << "] error_code[" << ret << "]";
-  }
-  return ret;
-}
-
-int InstanceNormCPUKernel::Run() {
-  src_data_ = reinterpret_cast<float *>(in_tensors_.at(0)->MutableData());
-  scale_data_ = reinterpret_cast<float *>(in_tensors_.at(1)->MutableData());
-  bias_data_ = reinterpret_cast<float *>(in_tensors_.at(2)->MutableData());
-  dst_data_ = reinterpret_cast<float *>(out_tensors_.at(0)->MutableData());
-  auto ret = ParallelLaunch(this->context_->thread_pool_, InstanceNormRun, this, op_parameter_->thread_num_);
-  if (ret != RET_OK) {
-    MS_LOG(ERROR) << "FillRun error error_code[" << ret << "]";
-    return ret;
-  }
-  return RET_OK;
-}
-
-kernel::LiteKernel *CpuInstanceNormFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
-                                                     const std::vector<lite::Tensor *> &outputs,
-                                                     OpParameter *opParameter, const lite::InnerContext *ctx,
-                                                     const kernel::KernelKey &desc,
-                                                     const mindspore::lite::PrimitiveC *primitive) {
-  if (opParameter == nullptr) {
-    MS_LOG(ERROR) << "Create kernel failed, opParameter is nullptr, type: PrimitiveType_InstanceNorm. ";
-    return nullptr;
-  }
-  MS_ASSERT(desc.type == schema::PrimitiveType_InstanceNorm);
-  auto *kernel = new (std::nothrow) InstanceNormCPUKernel(opParameter, inputs, outputs, ctx, primitive);
-  if (kernel == nullptr) {
-    MS_LOG(ERROR) << "new InstanceNormCPUKernel fail!";
-    free(opParameter);
-    return nullptr;
-  }
-  auto ret = kernel->Init();
-  if (ret != RET_OK) {
-    MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: "
-                  << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_));
-    delete kernel;
-    return nullptr;
-  }
-  return kernel;
-}
-
-REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_InstanceNorm, CpuInstanceNormFp32KernelCreator)
-}  // namespace mindspore::kernel
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/instance_norm_fp32.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/instance_norm_fp32.h
@ -1,53 +0,0 @@
-/**
- * Copyright 2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_INSTANCE_NORM_H_
-#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_INSTANCE_NORM_H_
-
-#include <vector>
-#include "src/lite_kernel.h"
-#include "include/context.h"
-#include "nnacl/fp32/instance_norm_fp32.h"
-
-using mindspore::lite::InnerContext;
-
-namespace mindspore::kernel {
-class InstanceNormCPUKernel : public LiteKernel {
- public:
-  InstanceNormCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                        const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
-                        const mindspore::lite::PrimitiveC *primitive)
-      : LiteKernel(parameter, inputs, outputs, ctx, primitive) {
-    param_ = reinterpret_cast<InstanceNormParameter *>(parameter);
-  }
-  ~InstanceNormCPUKernel() override{};
-
-  int Init() override;
-  int ReSize() override;
-  int Run() override;
-  int DoInstanceNorm(int thread_id);
-
- private:
-  InstanceNormParameter *param_ = nullptr;
-  int outer_size_;
-  int inner_size_;
-  float *src_data_ = nullptr;
-  float *dst_data_ = nullptr;
-  float *scale_data_ = nullptr;
-  float *bias_data_ = nullptr;
-};
-}  // namespace mindspore::kernel
-
-#endif  // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_INSTANCE_NORM_H_
--- a/mindspore/lite/src/runtime/kernel/arm/int8/argminmax_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/argminmax_int8.cc
@ -14,11 +14,8 @@
 * limitations under the License.
 */
 #include "src/runtime/kernel/arm/int8/argminmax_int8.h"
-#include <vector>
 #include "schema/model_generated.h"
 #include "src/kernel_registry.h"
-#include "nnacl/int8/arg_min_max_int8.h"
-#include "include/errorcode.h"

 using mindspore::lite::RET_ERROR;
 using mindspore::lite::RET_OK;
@ -31,10 +28,6 @@ using mindspore::schema::PrimitiveType_ArgMin;

 namespace mindspore::kernel {
 int ArgMinMaxInt8CPUKernel::Init() {
-  auto ret = ArgMinMaxBaseCPUKernel::Init();
-  if (ret != RET_OK) {
-    return ret;
-  }
  auto param = reinterpret_cast<ArgMinMaxParameter *>(op_parameter_);
  param->data_type_ = kNumberTypeInt8;
  auto *input_tensor = in_tensors_.at(kInputIndex);
@ -52,7 +45,23 @@ int ArgMinMaxInt8CPUKernel::Init() {
  return ReSize();
 }

-int ArgMinMaxInt8CPUKernel::ReSize() { return ArgMinMaxBaseCPUKernel::ReSize(); }
+int ArgMinMaxInt8CPUKernel::ReSize() {
+  auto in_shape = in_tensors_.at(0)->shape();
+  auto dims_size = in_shape.size();
+  auto param = reinterpret_cast<ArgMinMaxParameter *>(op_parameter_);
+  int axis = param->axis_ < 0 ? param->axis_ + dims_size : param->axis_;
+  param->axis_ = axis;
+  param->dims_size_ = dims_size;
+  if (param->topk_ <= 0) {
+    MS_LOG(ERROR) << "Invalid topk " << param->topk_;
+    return RET_ERROR;
+  }
+  param->topk_ = MSMIN(param->topk_, in_shape.at(axis));
+  ComputeStrides(in_shape.data(), param->in_strides_, in_shape.size());
+  auto out_shape = out_tensors_.at(0)->shape();
+  ComputeStrides(out_shape.data(), param->out_strides_, out_shape.size());
+  return RET_OK;
+}

 int ArgMinMaxInt8CPUKernel::Run() {
  auto input = in_tensors_.at(0);
@ -110,5 +119,4 @@ kernel::LiteKernel *CpuArgMinMaxInt8KernelCreator(const std::vector<lite::Tensor

 REG_KERNEL(kCPU, kNumberTypeInt8, PrimitiveType_ArgMax, CpuArgMinMaxInt8KernelCreator)
 REG_KERNEL(kCPU, kNumberTypeInt8, PrimitiveType_ArgMin, CpuArgMinMaxInt8KernelCreator)
-
 }  // namespace mindspore::kernel
--- a/mindspore/lite/src/runtime/kernel/arm/int8/argminmax_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/argminmax_int8.h
@ -17,16 +17,19 @@
 #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_ARGMINMAX_INT8_H_

 #include <vector>
-#include "src/runtime/kernel/arm/base/arg_min_max_base.h"
 #include "nnacl/quantization/quantize.h"
+#include "nnacl/int8/arg_min_max_int8.h"
+#include "nnacl/arithmetic_common.h"
+#include "include/errorcode.h"
+#include "src/lite_kernel.h"

 namespace mindspore::kernel {
-class ArgMinMaxInt8CPUKernel : public ArgMinMaxBaseCPUKernel {
+class ArgMinMaxInt8CPUKernel : public LiteKernel {
 public:
  ArgMinMaxInt8CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
                         const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                         const mindspore::lite::PrimitiveC *primitive)
-      : ArgMinMaxBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {}
+      : LiteKernel(parameter, inputs, outputs, ctx, primitive) {}

  ~ArgMinMaxInt8CPUKernel() = default;

--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/argminmax_fp32_test.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/argminmax_fp32_test.cc
@ -1,291 +0,0 @@
-/**
- * Copyright 2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "src/common/log_adapter.h"
-#include "common/common_test.h"
-#include "mindspore/lite/nnacl/fp32/arg_min_max_fp32.h"
-#include "mindspore/lite/nnacl/arg_min_max.h"
-#include "mindspore/lite/nnacl/arithmetic_common.h"
-
-namespace mindspore {
-
-class TestArgMinMaxTestFp32 : public mindspore::CommonTest {
- public:
-  TestArgMinMaxTestFp32() = default;
-};
-
-TEST_F(TestArgMinMaxTestFp32, ArgMaxTest1) {
-  std::vector<float> in = {10, 20, 30, 40, 90, 20, 11, 15, 1, 50, 30, 45, 25, 50, 30};
-  std::vector<float> except_out = {2, 2, 0, 2, 0};
-  std::vector<int> shape = {3, 5};
-  float out[5];
-  ArgMinMaxParameter param;
-  param.topk_ = 1;
-  param.out_value_ = false;
-  param.axis_ = 0;
-  param.data_type_ = 43;
-  param.dims_size_ = 2;
-  param.get_max_ = true;
-  param.keep_dims_ = false;
-  ArgMinMax(in.data(), out, shape.data(), &param);
-  for (size_t i = 0; i < except_out.size(); ++i) {
-    std::cout << out[i] << " ";
-  }
-  std::cout << "\n";
-  ASSERT_EQ(0, CompareOutputData(out, except_out.data(), except_out.size(), 0.000001));
-}
-
-TEST_F(TestArgMinMaxTestFp32, ArgMaxTest1_keep_dim) {
-  std::vector<float> in = {10, 20, 30, 40, 90, 20, 11, 15, 1, 50, 30, 45, 25, 50, 30};
-  std::vector<float> except_out = {2, 2, 0, 2, 0};
-  std::vector<int> shape = {3, 5};
-  float out[5];
-  ArgMinMaxParameter param;
-  param.topk_ = 1;
-  param.out_value_ = false;
-  param.axis_ = 0;
-  param.data_type_ = 43;
-  param.dims_size_ = 2;
-  param.get_max_ = true;
-  param.keep_dims_ = true;
-  param.arg_elements_ = reinterpret_cast<ArgElement *>(malloc(shape[param.axis_] * sizeof(ArgElement)));
-  std::vector<int> out_shape = {1, 5};
-  ComputeStrides(shape.data(), param.in_strides_, shape.size());
-  ComputeStrides(out_shape.data(), param.out_strides_, out_shape.size());
-  ArgMinMax(in.data(), out, shape.data(), &param);
-  for (size_t i = 0; i < except_out.size(); ++i) {
-    std::cout << out[i] << " ";
-  }
-  std::cout << "\n";
-  ASSERT_EQ(0, CompareOutputData(out, except_out.data(), except_out.size(), 0.000001));
-}
-
-TEST_F(TestArgMinMaxTestFp32, ArgMaxTest_axis2_keep_dim) {
-  std::vector<float> in = {10, 20, 30, 11, 15, 10, 5,  10, 12, 10, 20, 30, 11, 15,
-                           10, 5,  10, 12, 10, 20, 30, 11, 15, 10, 5,  10, 12};
-  std::vector<float> except_out = {1, 0, 0, 1, 0, 0, 1, 0, 0};
-  std::vector<int> shape = {1, 3, 3, 3};
-  float out[9];
-  ArgMinMaxParameter param;
-  param.topk_ = 1;
-  param.out_value_ = false;
-  param.axis_ = 2;
-  param.data_type_ = 43;
-  param.dims_size_ = 4;
-  param.get_max_ = true;
-  param.keep_dims_ = true;
-  param.arg_elements_ = reinterpret_cast<ArgElement *>(malloc(shape[param.axis_] * sizeof(ArgElement)));
-  std::vector<int> out_shape = {1, 3, 1, 3};
-  ComputeStrides(shape.data(), param.in_strides_, shape.size());
-  ComputeStrides(out_shape.data(), param.out_strides_, out_shape.size());
-  ArgMinMax(in.data(), out, shape.data(), &param);
-  for (size_t i = 0; i < except_out.size(); ++i) {
-    std::cout << out[i] << " ";
-  }
-  std::cout << "\n";
-  ASSERT_EQ(0, CompareOutputData(out, except_out.data(), except_out.size(), 0.000001));
-}
-
-TEST_F(TestArgMinMaxTestFp32, ArgMaxTest2) {
-  std::vector<float> in = {10, 20, 30, 40, 90, 20, 11, 15, 1, 50, 30, 45, 25, 50, 30};
-  std::vector<float> except_out = {30, 45, 30, 50, 90};
-  std::vector<int> shape = {3, 5};
-  float out[5];
-  ArgMinMaxParameter param;
-  param.topk_ = 1;
-  param.out_value_ = true;
-  param.axis_ = 0;
-  param.data_type_ = 43;
-  param.dims_size_ = 2;
-  param.get_max_ = true;
-  param.keep_dims_ = false;
-  ArgMinMax(in.data(), out, shape.data(), &param);
-  ASSERT_EQ(0, CompareOutputData(out, except_out.data(), except_out.size(), 0.000001));
-}
-
-TEST_F(TestArgMinMaxTestFp32, ArgMinTest2) {
-  std::vector<float> in = {10, 20, 30, 40, 90, 20, 11, 15, 1, 50, 30, 45, 25, 50, 30};
-  std::vector<float> except_out = {10, 11, 15, 1, 30};
-  std::vector<int> shape = {3, 5};
-  float out[5];
-  ArgMinMaxParameter param;
-  param.topk_ = 1;
-  param.out_value_ = true;
-  param.axis_ = 0;
-  param.data_type_ = 43;
-  param.dims_size_ = 2;
-  param.get_max_ = false;
-  param.keep_dims_ = false;
-  ArgMinMax(in.data(), out, shape.data(), &param);
-  ASSERT_EQ(0, CompareOutputData(out, except_out.data(), except_out.size(), 0.000001));
-}
-
-TEST_F(TestArgMinMaxTestFp32, ArgMaxTest3_axis2_out_data) {
-  std::vector<float> in = {10, 20, 30, 40, 90, 20, 11, 15, 1, 50, 30, 45, 25, 50, 30};
-  std::vector<float> except_out = {30, 45, 30, 50, 90, 20, 20, 25, 40, 50};
-  ArgMinMaxParameter param;
-  param.axis_ = 2;
-  std::vector<int> in_shape = {1, 1, 3, 5};
-  param.arg_elements_ = reinterpret_cast<ArgElement *>(malloc(in_shape[param.axis_] * sizeof(ArgElement)));
-  param.out_value_ = true;
-  param.topk_ = 2;
-  std::vector<int> out_shape = {1, 1, 2, 5};
-  ComputeStrides(in_shape.data(), param.in_strides_, in_shape.size());
-  ComputeStrides(out_shape.data(), param.out_strides_, out_shape.size());
-  float out[10];
-  ArgMaxDim2(in.data(), out, in_shape.data(), &param);
-  ASSERT_EQ(0, CompareOutputData(out, except_out.data(), except_out.size(), 0.00001));
-}
-
-TEST_F(TestArgMinMaxTestFp32, ArgMaxTest3_axis2_out_index) {
-  std::vector<float> in = {10, 20, 30, 40, 90, 20, 11, 15, 1, 50, 30, 45, 25, 50, 30};
-  std::vector<float> except_out = {2, 2, 0, 2, 0, 1, 0, 2, 0, 1};
-  ArgMinMaxParameter param;
-  param.axis_ = 2;
-  std::vector<int> in_shape = {1, 1, 3, 5};
-  param.arg_elements_ = reinterpret_cast<ArgElement *>(malloc(in_shape[param.axis_] * sizeof(ArgElement)));
-  param.out_value_ = false;
-  param.topk_ = 2;
-  std::vector<int> out_shape = {1, 1, 2, 5};
-  ComputeStrides(in_shape.data(), param.in_strides_, in_shape.size());
-  ComputeStrides(out_shape.data(), param.out_strides_, out_shape.size());
-  float out[10];
-  ArgMaxDim2(in.data(), out, in_shape.data(), &param);
-  ASSERT_EQ(0, CompareOutputData(out, except_out.data(), except_out.size(), 0.00001));
-}
-
-TEST_F(TestArgMinMaxTestFp32, ArgMaxTest4_axis3_out_data) {
-  std::vector<float> in = {10, 20, 30, 40, 90, 20, 11, 15, 1, 50, 30, 45, 25, 50, 30};
-  std::vector<float> except_out = {90, 40, 50, 20, 50, 45};
-  ArgMinMaxParameter param;
-  param.axis_ = 3;
-  std::vector<int> in_shape = {1, 1, 3, 5};
-  param.arg_elements_ = reinterpret_cast<ArgElement *>(malloc(in_shape[param.axis_] * sizeof(ArgElement)));
-  param.out_value_ = true;
-  param.topk_ = 2;
-  std::vector<int> out_shape = {1, 1, 3, 2};
-  ComputeStrides(in_shape.data(), param.in_strides_, in_shape.size());
-  ComputeStrides(out_shape.data(), param.out_strides_, out_shape.size());
-  float out[6];
-  ArgMaxDim3(in.data(), out, in_shape.data(), &param);
-  ASSERT_EQ(0, CompareOutputData(out, except_out.data(), except_out.size(), 0.00001));
-}
-
-TEST_F(TestArgMinMaxTestFp32, ArgMaxTest4_axis3_out_index) {
-  std::vector<float> in = {10, 20, 30, 40, 90, 20, 11, 15, 1, 50, 30, 45, 25, 50, 30};
-  std::vector<float> except_out = {4, 3, 4, 0, 3, 1};
-  ArgMinMaxParameter param;
-  param.axis_ = 3;
-  std::vector<int> in_shape = {1, 1, 3, 5};
-  param.arg_elements_ = reinterpret_cast<ArgElement *>(malloc(in_shape[param.axis_] * sizeof(ArgElement)));
-  param.out_value_ = false;
-  param.topk_ = 2;
-  std::vector<int> out_shape = {1, 1, 3, 2};
-  ComputeStrides(in_shape.data(), param.in_strides_, in_shape.size());
-  ComputeStrides(out_shape.data(), param.out_strides_, out_shape.size());
-  float out[6];
-  ArgMaxDim3(in.data(), out, in_shape.data(), &param);
-  ASSERT_EQ(0, CompareOutputData(out, except_out.data(), except_out.size(), 0.00001));
-}
-
-TEST_F(TestArgMinMaxTestFp32, ArgMaxTest5_axis1_out_index) {
-  std::vector<float> in = {100, 2,  300, 4,  50, 6,  11, 12, 13, 34, 35, 36,  9,  6, 17, 10, 20, 30,
-                           10,  20, 30,  40, 5,  60, 7,  80, 90, 10, 11, 120, 18, 5, 16, 9,  22, 23};
-  std::vector<float> except_out = {0, 1, 0, 1, 0, 1, 1, 2, 2, 2, 1, 2, 2, 1, 1, 0, 2, 1, 0, 0, 0, 1, 1, 0};
-  ArgMinMaxParameter param;
-  param.axis_ = 1;
-  std::vector<int> in_shape = {2, 3, 2, 3};
-  param.arg_elements_ = reinterpret_cast<ArgElement *>(malloc(in_shape[param.axis_] * sizeof(ArgElement)));
-  param.out_value_ = false;
-  param.topk_ = 2;
-  std::vector<int> out_shape = {2, 2, 2, 3};
-  ComputeStrides(in_shape.data(), param.in_strides_, in_shape.size());
-  ComputeStrides(out_shape.data(), param.out_strides_, out_shape.size());
-  float out[24];
-  ArgMaxDim1(in.data(), out, in_shape.data(), &param);
-  ASSERT_EQ(0, CompareOutputData(out, except_out.data(), except_out.size(), 0.00001));
-}
-
-TEST_F(TestArgMinMaxTestFp32, ArgMaxTest5_axis1_out_data) {
-  std::vector<float> in = {100, 2,  300, 4,  50, 6,  11, 12, 13, 34, 35, 36,  9,  6, 17, 10, 20, 30,
-                           10,  20, 30,  40, 5,  60, 7,  80, 90, 10, 11, 120, 18, 5, 16, 9,  22, 23};
-  std::vector<float> except_out = {100, 12, 300, 34, 50, 36,  11, 6,  17, 10, 35, 30,
-                                   18,  80, 90,  40, 22, 120, 10, 20, 30, 10, 11, 60};
-  ArgMinMaxParameter param;
-  param.axis_ = 1;
-  std::vector<int> in_shape = {2, 3, 2, 3};
-  param.arg_elements_ = reinterpret_cast<ArgElement *>(malloc(in_shape[param.axis_] * sizeof(ArgElement)));
-  param.out_value_ = true;
-  param.topk_ = 2;
-  std::vector<int> out_shape = {2, 2, 2, 3};
-  ComputeStrides(in_shape.data(), param.in_strides_, in_shape.size());
-  ComputeStrides(out_shape.data(), param.out_strides_, out_shape.size());
-  float out[24];
-  ArgMaxDim1(in.data(), out, in_shape.data(), &param);
-  ASSERT_EQ(0, CompareOutputData(out, except_out.data(), except_out.size(), 0.00001));
-}
-
-TEST_F(TestArgMinMaxTestFp32, ArgMaxTest6_axis0_out_index) {
-  std::vector<float> in = {100, 2, 4, 50, 11, 12, 34, 35, 10, 20, 40, 5, 7, 80, 10, 11, 55, 25, 5, 15, 18, 8, 15, 16};
-  std::vector<float> except_out = {0, 2, 1, 0, 2, 1, 0, 0, 2, 1, 2, 2, 0, 0, 2, 2};
-  ArgMinMaxParameter param;
-  param.axis_ = 1;
-  std::vector<int> in_shape = {3, 2, 2, 2};
-  param.arg_elements_ = reinterpret_cast<ArgElement *>(malloc(in_shape[param.axis_] * sizeof(ArgElement)));
-  param.out_value_ = false;
-  param.topk_ = 2;
-  std::vector<int> out_shape = {2, 2, 2, 2};
-  ComputeStrides(in_shape.data(), param.in_strides_, in_shape.size());
-  ComputeStrides(out_shape.data(), param.out_strides_, out_shape.size());
-  float out[16];
-  ArgMaxDim0(in.data(), out, in_shape.data(), &param);
-  ASSERT_EQ(0, CompareOutputData(out, except_out.data(), except_out.size(), 0.00001));
-}
-
-TEST_F(TestArgMinMaxTestFp32, ArgMaxTest6_axis0_out_data) {
-  std::vector<float> in = {100, 2, 4, 50, 11, 12, 34, 35, 10, 20, 40, 5, 7, 80, 10, 11, 55, 25, 5, 15, 18, 8, 15, 16};
-  std::vector<float> except_out = {100, 25, 40, 50, 18, 80, 34, 35, 55, 20, 5, 15, 11, 12, 15, 16};
-  ArgMinMaxParameter param;
-  param.axis_ = 1;
-  std::vector<int> in_shape = {3, 2, 2, 2};
-  param.arg_elements_ = reinterpret_cast<ArgElement *>(malloc(in_shape[param.axis_] * sizeof(ArgElement)));
-  param.out_value_ = true;
-  param.topk_ = 2;
-  std::vector<int> out_shape = {2, 2, 2, 2};
-  ComputeStrides(in_shape.data(), param.in_strides_, in_shape.size());
-  ComputeStrides(out_shape.data(), param.out_strides_, out_shape.size());
-  float out[16];
-  ArgMaxDim0(in.data(), out, in_shape.data(), &param);
-  ASSERT_EQ(0, CompareOutputData(out, except_out.data(), except_out.size(), 0.00001));
-}
-
-TEST_F(TestArgMinMaxTestFp32, ArgMinTest1_axis3_out_data) {
-  std::vector<float> in = {10, 20, 30, 40, 90, 20, 11, 15, 1, 50, 30, 45, 25, 50, 30};
-  std::vector<float> except_out = {10, 20, 1, 11, 25, 30};
-  ArgMinMaxParameter param;
-  param.axis_ = 3;
-  std::vector<int> in_shape = {1, 1, 3, 5};
-  param.arg_elements_ = reinterpret_cast<ArgElement *>(malloc(in_shape[param.axis_] * sizeof(ArgElement)));
-  param.out_value_ = true;
-  param.topk_ = 2;
-  std::vector<int> out_shape = {1, 1, 3, 2};
-  ComputeStrides(in_shape.data(), param.in_strides_, in_shape.size());
-  ComputeStrides(out_shape.data(), param.out_strides_, out_shape.size());
-  float out[6];
-  ArgMinDim3(in.data(), out, in_shape.data(), &param);
-  ASSERT_EQ(0, CompareOutputData(out, except_out.data(), except_out.size(), 0.00001));
-}
-
-}  // namespace mindspore
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/instance_norm_fp32_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/instance_norm_fp32_tests.cc
@ -1,134 +0,0 @@
-/**
- * Copyright 2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include <iostream>
-#include "src/common/log_adapter.h"
-#include "common/common_test.h"
-#include "mindspore/lite/nnacl/fp32/instance_norm_fp32.h"
-#include "mindspore/lite/src/kernel_registry.h"
-#include "mindspore/lite/src/lite_kernel.h"
-
-namespace mindspore {
-class TestInstanceNormFp32 : public mindspore::CommonTest {
- public:
-  TestInstanceNormFp32() {}
-};
-
-TEST_F(TestInstanceNormFp32, INTest1) {
-  std::vector<float> in_data = {-11.18675,  11.433986,  11.386012, 11.245945,   -2.7614849, 14.692399,
-                                -1.1983503, -6.6790967, 6.383416,  -13.3213005, -8.693595,  9.476344};
-  std::vector<float> in_data1 = {12.352293, 5.122387, 14.249514};
-  std::vector<float> in_data2 = {14.632595, 0.70900035, 11.179003};
-
-  InstanceNormParameter op_param;
-  op_param.op_parameter_.type_ = schema::PrimitiveType_InstanceNorm;
-  op_param.epsilon_ = 0.001f;
-
-  lite::Tensor input0_tensor(kNumberTypeFloat32, {1, 2, 2, 3});
-  lite::Tensor input1_tensor(kNumberTypeFloat32, {3});
-  lite::Tensor input2_tensor(kNumberTypeFloat32, {3});
-  input0_tensor.set_data(in_data.data());
-  input1_tensor.set_data(in_data1.data());
-  input2_tensor.set_data(in_data2.data());
-  std::vector<lite::Tensor *> inputs_tensor = {&input0_tensor, &input1_tensor, &input2_tensor};
-
-  std::vector<float> output(12);
-  std::vector<float> corr_out = {5.0145645, 9.248516,   15.439679, 33.51017,  0.0012711287, 31.0666883,
-                                 17.70254,  -2.5507483, -8.204435, 2.3031063, -3.8630369,   6.4138837};
-
-  lite::Tensor output0_tensor(kNumberTypeFloat32, {1, 2, 2, 3});
-  output0_tensor.set_data(output.data());
-  std::vector<lite::Tensor *> outputs_tensor = {&output0_tensor};
-
-  kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat32, schema::PrimitiveType_InstanceNorm};
-  auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
-  ASSERT_NE(creator, nullptr);
-  lite::InnerContext ctx;
-  ctx.thread_num_ = 4;
-  ASSERT_EQ(lite::RET_OK, ctx.Init());
-  kernel::LiteKernel *kernel =
-    creator(inputs_tensor, outputs_tensor, reinterpret_cast<OpParameter *>(&op_param), &ctx, desc, nullptr);
-  ASSERT_NE(kernel, nullptr);
-  auto output_tensor_shape = output0_tensor.shape();
-  kernel->Run();
-
-  printf("==================output data=================\n");
-  for (int i = 0; i < output0_tensor.ElementsNum(); i++) {
-    std::cout << output[i] << " ,";
-  }
-  std::cout << std::endl;
-  ASSERT_EQ(0, CompareOutputData(output.data(), corr_out.data(), output0_tensor.ElementsNum(), 0.001));
-
-  input0_tensor.set_data(nullptr);
-  input1_tensor.set_data(nullptr);
-  input2_tensor.set_data(nullptr);
-  output0_tensor.set_data(nullptr);
-}
-
-TEST_F(TestInstanceNormFp32, INTest2) {
-  std::vector<float> in_data = {-11.18675,  11.433986,  11.386012, 11.245945,   -2.7614849, 14.692399,
-                                -1.1983503, -6.6790967, 6.383416,  -13.3213005, -8.693595,  9.476344,
-                                -12.18675,  12.433986,  12.386012, 12.245945,   -3.7614849, 15.692399,
-                                -2.1983503, -7.6790967, 7.383416,  -14.3213005, -9.693595,  10.476344};
-  std::vector<float> in_data1 = {12.352293, 5.122387, 14.249514, 12.352293, 5.122387, 14.249514};
-  std::vector<float> in_data2 = {14.632595, 0.70900035, 11.179003, 14.632595, 0.70900035, 11.179003};
-
-  InstanceNormParameter op_param;
-  op_param.op_parameter_.type_ = schema::PrimitiveType_InstanceNorm;
-  op_param.epsilon_ = 0.001f;
-
-  lite::Tensor input0_tensor(kNumberTypeFloat32, {2, 2, 2, 3});
-  lite::Tensor input1_tensor(kNumberTypeFloat32, {2, 3});
-  lite::Tensor input2_tensor(kNumberTypeFloat32, {2, 3});
-  input0_tensor.set_data(in_data.data());
-  input1_tensor.set_data(in_data1.data());
-  input2_tensor.set_data(in_data2.data());
-  std::vector<lite::Tensor *> inputs_tensor = {&input0_tensor, &input1_tensor, &input2_tensor};
-
-  std::vector<float> output(24);
-  std::vector<float> corr_out = {5.0145645, 9.248516,   15.439679, 33.51017,  0.0012711287, 31.0666883,
-                                 17.70254,  -2.5507483, -8.204435, 2.3031063, -3.8630369,   6.4138837,
-                                 5.133601,  9.310399,   15.439679, 33.886883, -0.22505027,  31.066883,
-                                 16.888313, -2.5316327, -8.204435, 2.6215858, -3.717714,    6.4138837};
-
-  lite::Tensor output0_tensor(kNumberTypeFloat32, {2, 2, 2, 3});
-  output0_tensor.set_data(output.data());
-  std::vector<lite::Tensor *> outputs_tensor = {&output0_tensor};
-
-  kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat32, schema::PrimitiveType_InstanceNorm};
-  auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
-  ASSERT_NE(creator, nullptr);
-  lite::InnerContext ctx;
-  ctx.thread_num_ = 4;
-  ASSERT_EQ(lite::RET_OK, ctx.Init());
-  kernel::LiteKernel *kernel =
-    creator(inputs_tensor, outputs_tensor, reinterpret_cast<OpParameter *>(&op_param), &ctx, desc, nullptr);
-  ASSERT_NE(kernel, nullptr);
-  auto output_tensor_shape = output0_tensor.shape();
-  kernel->Run();
-
-  printf("==================output data=================\n");
-  for (int i = 0; i < output0_tensor.ElementsNum(); i++) {
-    std::cout << output[i] << " ,";
-  }
-  std::cout << std::endl;
-  ASSERT_EQ(0, CompareOutputData(output.data(), corr_out.data(), output0_tensor.ElementsNum(), 0.001));
-
-  input0_tensor.set_data(nullptr);
-  input1_tensor.set_data(nullptr);
-  input2_tensor.set_data(nullptr);
-  output0_tensor.set_data(nullptr);
-}
-}  // namespace mindspore