diff --git a/.jenkins/check/config/filter_cppcheck.txt b/.jenkins/check/config/filter_cppcheck.txt index 2aee69fd0c6..fd233351aba 100644 --- a/.jenkins/check/config/filter_cppcheck.txt +++ b/.jenkins/check/config/filter_cppcheck.txt @@ -71,3 +71,6 @@ "mindspore/mindspore/lite/src/litert/kernel/cpu/fp32/convolution_winograd_fp32.cc" "knownConditionTrueFalse" "mindspore/mindspore/lite/src/litert/kernel/cpu/fp32/convolution_winograd_fp32.cc" "shadowVariable" "mindspore/mindspore/ccsrc/plugin/device/ascend/kernel/tbe/tbe_utils.cc" "knownConditionTrueFalse" +"mindspore/mindspore/lite/tools/kernel_builder/ascend/tbe_dsl/sample/op_proto/add_dsl.cc" "syntaxError" +"mindspore/mindspore/lite/tools/kernel_builder/ascend/tbe_tik/sample/op_proto/matmul_tik.cc" "syntaxError" + diff --git a/.jenkins/check/config/filter_cpplint.txt b/.jenkins/check/config/filter_cpplint.txt index 15c009db00f..b9299119e6e 100644 --- a/.jenkins/check/config/filter_cpplint.txt +++ b/.jenkins/check/config/filter_cpplint.txt @@ -86,3 +86,23 @@ "mindspore/mindspore/lite/src/litert/delegate/nnapi/nnapi_implementation.cc" "build/include_order" "mindspore/mindspore/lite/src/extendrt/cxx_api/model/model_impl.cc" "whitespace/parens" "mindspore/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/experimental/HPC-generator/gemm_mask_avx512/" "runtime/int" +# ascend samples +"mindspore/mindspore/lite/tools/kernel_builder/ascend/aicpu/sample/" "build/include_subdir" +"mindspore/mindspore/lite/tools/kernel_builder/ascend/tbe_dsl/sample/" "build/include_subdir" +"mindspore/mindspore/lite/tools/kernel_builder/ascend/tbe_tik/sample/" "build/include_subdir" +"mindspore/mindspore/lite/tools/kernel_builder/ascend/aicpu/sample/" "runtime/references" +"mindspore/mindspore/lite/tools/kernel_builder/ascend/tbe_dsl/sample/" "runtime/references" +"mindspore/mindspore/lite/tools/kernel_builder/ascend/tbe_tik/sample/" "runtime/references" +"mindspore/mindspore/lite/tools/kernel_builder/ascend/aicpu/sample/" "whitespace/comments" +"mindspore/mindspore/lite/tools/kernel_builder/ascend/tbe_dsl/sample/" "whitespace/comments" +"mindspore/mindspore/lite/tools/kernel_builder/ascend/tbe_tik/sample/" "whitespace/comments" +"mindspore/mindspore/lite/tools/kernel_builder/ascend/aicpu/sample/" "legal/copyright" +"mindspore/mindspore/lite/tools/kernel_builder/ascend/tbe_dsl/sample/" "legal/copyright" +"mindspore/mindspore/lite/tools/kernel_builder/ascend/tbe_tik/sample/" "legal/copyright" +"mindspore/mindspore/lite/tools/kernel_builder/ascend/aicpu/sample/" "whitespace/ending_newline" +"mindspore/mindspore/lite/tools/kernel_builder/ascend/tbe_dsl/sample/" "whitespace/ending_newline" +"mindspore/mindspore/lite/tools/kernel_builder/ascend/tbe_tik/sample/" "whitespace/ending_newline" +"mindspore/mindspore/lite/tools/kernel_builder/ascend/aicpu/sample/" "build/include" +"mindspore/mindspore/lite/tools/kernel_builder/ascend/tbe_dsl/sample/" "build/include" +"mindspore/mindspore/lite/tools/kernel_builder/ascend/tbe_tik/sample/" "build/include" + diff --git a/.jenkins/check/config/filter_pylint.txt b/.jenkins/check/config/filter_pylint.txt index e57051f3be9..14e6c0a7877 100644 --- a/.jenkins/check/config/filter_pylint.txt +++ b/.jenkins/check/config/filter_pylint.txt @@ -184,3 +184,10 @@ "mindspore/mindspore/lite/python/api/tensor.py" "protected-access" "mindspore/mindspore/lite/test" "missing-docstring" "mindspore/mindspore/lite/test" "unused-variable" +# ascend samples +"mindspore/mindspore/lite/tools/kernel_builder/ascend/tbe_dsl/sample/" "wrong-import-order" +"mindspore/mindspore/lite/tools/kernel_builder/ascend/tbe_tik/sample/" "wrong-import-order" +"mindspore/mindspore/lite/tools/kernel_builder/ascend/tbe_dsl/sample/" "bad-whitespace" +"mindspore/mindspore/lite/tools/kernel_builder/ascend/tbe_tik/sample/" "bad-whitespace" +"mindspore/mindspore/lite/tools/kernel_builder/ascend/tbe_dsl/sample/" "bad-continuation" +"mindspore/mindspore/lite/tools/kernel_builder/ascend/tbe_tik/sample/" "bad-continuation" diff --git a/mindspore/lite/tools/kernel_builder/ascend/CMakeLists.txt b/mindspore/lite/tools/kernel_builder/ascend/CMakeLists.txt new file mode 100644 index 00000000000..23e7c0146c3 --- /dev/null +++ b/mindspore/lite/tools/kernel_builder/ascend/CMakeLists.txt @@ -0,0 +1,2 @@ +cmake_minimum_required(VERSION 3.12) +project(MS_ASCEND_CUSTOM_KERNEL_INSTALLER) diff --git a/mindspore/lite/tools/kernel_builder/ascend/README.md b/mindspore/lite/tools/kernel_builder/ascend/README.md new file mode 100644 index 00000000000..39e790ced94 --- /dev/null +++ b/mindspore/lite/tools/kernel_builder/ascend/README.md @@ -0,0 +1,15 @@ +Build Ascend customized kernel. +More details please refer to https://gitee.com/ascend/samples.git. + +## build + +mkdir build +cd build +cmake ../ +make + +## install + +./ms_ascend_custom_kernel_installer.run + +After install, you can use converter tools to convert model with customized kernel on Ascend developing env. diff --git a/mindspore/lite/tools/kernel_builder/ascend/aicpu/sample/op_info_cfg/aicpu_kernel/reshape_cust.ini b/mindspore/lite/tools/kernel_builder/ascend/aicpu/sample/op_info_cfg/aicpu_kernel/reshape_cust.ini new file mode 100644 index 00000000000..905afc1884a --- /dev/null +++ b/mindspore/lite/tools/kernel_builder/ascend/aicpu/sample/op_info_cfg/aicpu_kernel/reshape_cust.ini @@ -0,0 +1,9 @@ +[ReshapeCust] +opInfo.engine=DNN_VM_AICPU +opInfo.flagPartial=False +opInfo.computeCost=100 +opInfo.flagAsync=False +opInfo.opKernelLib=CUSTAICPUKernel +opInfo.kernelSo=libcust_aicpu_kernels.so +opInfo.functionName=RunCpuKernel +opInfo.workspaceSize=1024 diff --git a/mindspore/lite/tools/kernel_builder/ascend/aicpu/sample/reshape_cust_kernels.cc b/mindspore/lite/tools/kernel_builder/ascend/aicpu/sample/reshape_cust_kernels.cc new file mode 100644 index 00000000000..f67ce3ff01d --- /dev/null +++ b/mindspore/lite/tools/kernel_builder/ascend/aicpu/sample/reshape_cust_kernels.cc @@ -0,0 +1,41 @@ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2020. All rights reserved. + * Description: implement of sample + */ + +#include "reshape_cust_kernels.h" +#include +#include "cpu_types.h" + +namespace { +const char *RESHAPE_CUST = "ReshapeCust"; +} + +namespace aicpu { +uint32_t ReshapeCustCpuKernel::Compute(CpuKernelContext &ctx) { + Tensor *input_tensor = ctx.Input(0); + if (input_tensor == nullptr) { + return -1; + } + + Tensor *output_tensor = ctx.Output(0); + if (output_tensor == nullptr) { + return -1; + } + auto input_data = input_tensor->GetData(); + if (input_data == nullptr) { + return -1; + } + + auto output_data = output_tensor->GetData(); + if (output_data == nullptr) { + return -1; + } + + uint64_t data_size = input_tensor->GetDataSize(); + memcpy(output_data, input_data, data_size); + return 0; +} + +REGISTER_CPU_KERNEL(RESHAPE_CUST, ReshapeCustCpuKernel); +} // namespace aicpu diff --git a/mindspore/lite/tools/kernel_builder/ascend/aicpu/sample/reshape_cust_kernels.h b/mindspore/lite/tools/kernel_builder/ascend/aicpu/sample/reshape_cust_kernels.h new file mode 100644 index 00000000000..2bbfcaeb751 --- /dev/null +++ b/mindspore/lite/tools/kernel_builder/ascend/aicpu/sample/reshape_cust_kernels.h @@ -0,0 +1,28 @@ +/* Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _AICPU_RESHAPE_CUST_KERNELS_H_ +#define _AICPU_RESHAPE_CUST_KERNELS_H_ + +#include "cpu_kernel.h" + +namespace aicpu { +class ReshapeCustCpuKernel : public CpuKernel { + public: + ~ReshapeCustCpuKernel() = default; + uint32_t Compute(CpuKernelContext &ctx) override; +}; +} // namespace aicpu +#endif diff --git a/mindspore/lite/tools/kernel_builder/ascend/scripts/installer.sh b/mindspore/lite/tools/kernel_builder/ascend/scripts/installer.sh new file mode 100644 index 00000000000..983a771245a --- /dev/null +++ b/mindspore/lite/tools/kernel_builder/ascend/scripts/installer.sh @@ -0,0 +1,2 @@ +#!/bin/bash +#install mindspore ascend customized kernel \ No newline at end of file diff --git a/mindspore/lite/tools/kernel_builder/ascend/tbe_dsl/sample/op_proto/CMakeLists.txt b/mindspore/lite/tools/kernel_builder/ascend/tbe_dsl/sample/op_proto/CMakeLists.txt new file mode 100644 index 00000000000..1eebd6abbad --- /dev/null +++ b/mindspore/lite/tools/kernel_builder/ascend/tbe_dsl/sample/op_proto/CMakeLists.txt @@ -0,0 +1,19 @@ +# Copyright (c) Huawei Technologies Co., Ltd. 2020. All rights reserved. +set(CMAKE_CXX_COMPILER g++) +set(CMAKE_C_COMPILER gcc) +# add source files +aux_source_directory(. SRCS) + +if("x${SRCS}" STREQUAL "x") + add_custom_target(${OP_PROTO_TARGET} + COMMAND mkdir -p ${OP_PROTO_TARGET_OUT_DIR} + COMMAND echo "no source to make lib${OP_PROTO_TARGET}.so") + return(0) +endif() + +set(LIBRARY_OUTPUT_PATH ${OP_PROTO_TARGET_OUT_DIR}) + +message(STATUS "OP_PROTO_TARGET=${OP_PROTO_TARGET}") +add_library(${OP_PROTO_TARGET} SHARED ${SRCS}) + +target_link_libraries(${OP_PROTO_TARGET} ${ASCEND_INC}/../lib64/libgraph.so) diff --git a/mindspore/lite/tools/kernel_builder/ascend/tbe_dsl/sample/op_proto/add_dsl.cc b/mindspore/lite/tools/kernel_builder/ascend/tbe_dsl/sample/op_proto/add_dsl.cc new file mode 100644 index 00000000000..97e6a3f52dc --- /dev/null +++ b/mindspore/lite/tools/kernel_builder/ascend/tbe_dsl/sample/op_proto/add_dsl.cc @@ -0,0 +1,94 @@ +/** + * Copyright (C) 2019. Huawei Technologies Co., Ltd. All rights reserved. + + * This program is free software; you can redistribute it and/or modify + * it under the terms of the Apache License Version 2.0.You may not use this file except in compliance with the License. + + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Apache License for more details at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * @file add_dsl.cpp + * + * @brief + * + * @version 1.0 + * + */ +#include "./add_dsl.h" +#include +#include + +namespace ge { + +bool InferShapeAndTypeAdd(Operator &op, const string &input_name1, const string &input_name2, + const string &output_name) { + // vOutputDesc.push_back(op.GetInputDesc(0)); + TensorDesc vOutputDesc = op.GetOutputDescByName(output_name.c_str()); + + DataType input_dtype = op.GetInputDescByName(input_name1.c_str()).GetDataType(); + Format input_format = op.GetInputDescByName(input_name1.c_str()).GetFormat(); + // 针对shape维度大小进行交换 + ge::Shape shapeX = op.GetInputDescByName(input_name1.c_str()).GetShape(); + ge::Shape shapeY = op.GetInputDescByName(input_name2.c_str()).GetShape(); + std::vector dimsX = shapeX.GetDims(); + std::vector dimsY = shapeY.GetDims(); + if (dimsX.size() < dimsY.size()) { + std::vector dimsTmp = dimsX; + dimsX = dimsY; + dimsY = dimsTmp; + } + + // 对小的shape进行1补齐 + if (dimsX.size() != dimsY.size()) { + int dec = dimsX.size() - dimsY.size(); + for (int i = 0; i < dec; i++) { + dimsY.insert(dimsY.begin(), (int64_t)1); + } + } + + // 设置输出的shape维度 + std::vector dimVec; + for (size_t i = 0; i < dimsX.size(); i++) { + if ((dimsX[i] != dimsY[i]) && (dimsX[i] != 1) && (dimsY[i] != 1)) { + return false; + } + + int64_t dims = dimsX[i] > dimsY[i] ? dimsX[i] : dimsY[i]; + dimVec.push_back(dims); + } + ge::Shape outputShape = ge::Shape(dimVec); + + vOutputDesc.SetShape(outputShape); + vOutputDesc.SetDataType(input_dtype); + vOutputDesc.SetFormat(input_format); + op.UpdateOutputDesc(output_name.c_str(), vOutputDesc); + + return true; +} + +//----------------Add------------------- +IMPLEMT_VERIFIER(AddDsl, AddVerify) { + if (op.GetInputDescByName("x1").GetDataType() != op.GetInputDescByName("x2").GetDataType()) { + return GRAPH_FAILED; + } + return GRAPH_SUCCESS; +} + +// Obtains the processing function of the output tensor description. +IMPLEMT_COMMON_INFERFUNC(AddInferShape) { + if (InferShapeAndTypeAdd(op, "x1", "x2", "y")) { + return GRAPH_SUCCESS; + } + return GRAPH_FAILED; +} + +// Registered inferfunction +COMMON_INFER_FUNC_REG(AddDsl, AddInferShape); + +// Registered verify function +VERIFY_FUNC_REG(AddDsl, AddVerify); +//----------------Add------------------- +} // namespace ge diff --git a/mindspore/lite/tools/kernel_builder/ascend/tbe_dsl/sample/op_proto/add_dsl.h b/mindspore/lite/tools/kernel_builder/ascend/tbe_dsl/sample/op_proto/add_dsl.h new file mode 100644 index 00000000000..769c30025a0 --- /dev/null +++ b/mindspore/lite/tools/kernel_builder/ascend/tbe_dsl/sample/op_proto/add_dsl.h @@ -0,0 +1,35 @@ +/** + * Copyright (C) 2020. Huawei Technologies Co., Ltd. All rights reserved. + + * This program is free software; you can redistribute it and/or modify + * it under the terms of the Apache License Version 2.0.You may not use this file except in compliance with the License. + + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Apache License for more details at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * @file add_dsl.h + * + * @brief + * + * @version 1.0 + * + */ + +#ifndef GE_OPS_OP_PROTO_ADDDSL_H_ +#define GE_OPS_OP_PROTO_ADDDSL_H_ +#include "graph/operator_reg.h" +namespace ge { +REG_OP(AddDsl) + .INPUT(x1, TensorType({DT_FLOAT, DT_INT32, DT_INT64, DT_FLOAT16, DT_INT16, DT_INT8, DT_UINT8, DT_DOUBLE, + DT_COMPLEX128, DT_COMPLEX64, DT_STRING})) + .INPUT(x2, TensorType({DT_FLOAT, DT_INT32, DT_INT64, DT_FLOAT16, DT_INT16, DT_INT8, DT_UINT8, DT_DOUBLE, + DT_COMPLEX128, DT_COMPLEX64, DT_STRING})) + .OUTPUT(y, TensorType({DT_FLOAT, DT_INT32, DT_INT64, DT_FLOAT16, DT_INT16, DT_INT8, DT_UINT8, DT_DOUBLE, + DT_COMPLEX128, DT_COMPLEX64, DT_STRING})) + .OP_END_FACTORY_REG(AddDsl) +} + +#endif // GE_OPS_OP_PROTO_ADDDSL_H_ diff --git a/mindspore/lite/tools/kernel_builder/ascend/tbe_dsl/sample/tbe/impl/__init__.py b/mindspore/lite/tools/kernel_builder/ascend/tbe_dsl/sample/tbe/impl/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/mindspore/lite/tools/kernel_builder/ascend/tbe_dsl/sample/tbe/impl/add_dsl.py b/mindspore/lite/tools/kernel_builder/ascend/tbe_dsl/sample/tbe/impl/add_dsl.py new file mode 100755 index 00000000000..8f28d0dc383 --- /dev/null +++ b/mindspore/lite/tools/kernel_builder/ascend/tbe_dsl/sample/tbe/impl/add_dsl.py @@ -0,0 +1,117 @@ +#!/usr/bin/env python +# -*- coding:utf-8 -*- +""" +Copyright (C) 2019. Huawei Technologies Co., Ltd. All rights reserved. + +This program is free software; you can redistribute it and/or modify +it under the terms of the Apache License Version 2.0.You may not use this file +except in compliance with the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +Apache License for more details at +http://www.apache.org/licenses/LICENSE-2.0 + +add +""" +from __future__ import absolute_import + +import tbe.dsl as tbe +from functools import reduce +from tbe import tvm +from tbe.common.register import register_op_compute +from tbe.common.utils import para_check +from tbe.common.utils import shape_util + +# General limitation of the reduce size for input shape: 2**31 +SHAPE_SIZE_LIMIT = 2147483648 + + +# pylint: disable=locally-disabled,too-many-arguments,unused-argument +@register_op_compute("Add", op_mode="dynamic", support_fusion=True) +def add_compute(input_x, input_y, output_z, kernel_name="add"): + """ + calculating data's add, c = a + b + + Parameters + ---------- + input_x: TVM tensor + the placeholder of first input data + input_y: TVM tensor + the placeholder of second input data + output_data: dict + shape and dtype of output, should be broadcast shape and type as input + kernel_name: str + cce kernel name, default value is add + + Returns + ------- + res : output of the data's add + """ + shape_x = shape_util.shape_to_list(input_x.shape) + shape_y = shape_util.shape_to_list(input_y.shape) + + shape_x, shape_y, shape_max = shape_util.broadcast_shapes(shape_x, shape_y, + param_name_input1="input_x", + param_name_input2="input_y") + shape_size = reduce(lambda x, y: x * y, shape_max[:]) + if shape_size > SHAPE_SIZE_LIMIT: + raise RuntimeError("the shape is too large to calculate") + + input_x = tbe.broadcast(input_x, shape_max) + input_y = tbe.broadcast(input_y, shape_max) + res = tbe.vadd(input_x, input_y) + + return res + + +@para_check.check_op_params(para_check.REQUIRED_INPUT, para_check.REQUIRED_INPUT, + para_check.REQUIRED_OUTPUT, para_check.KERNEL_NAME) +def add_dsl(input_x, input_y, output_z, kernel_name="add_dsl"): + """ + algorithm: add + calculating data's add, c = a + b + + Parameters + ---------- + input_x : dict + shape and dtype of first input, only support float16, float32, int32 + input_y : dict + shape and dtype of second input, only support float16, float32, int32 + output_z: dict + shape and dtype of output, should be broadcast shape and type as input + kernel_name : str + cce kernel name, default value is add + + Returns + ------- + None + """ + shape_x = input_x.get("shape") + shape_y = input_y.get("shape") + + check_tuple = ("float16", "float32", "int32") + input_data_type = input_x.get("dtype").lower() + para_check.check_dtype(input_data_type, check_tuple, param_name="input_x") + + shape_x, shape_y, shape_max = shape_util.broadcast_shapes(shape_x, shape_y, + param_name_input1="input_x", + param_name_input2="input_y") + + if shape_x[-1] == 1 and shape_y[-1] == 1 and shape_max[-1] == 1: + shape_x = shape_x if len(shape_x) == 1 else shape_x[:-1] + shape_y = shape_y if len(shape_y) == 1 else shape_y[:-1] + shape_max = shape_max if len(shape_max) == 1 else shape_max[:-1] + + data_x = tvm.placeholder(shape_x, name="data_1", dtype=input_data_type) + data_y = tvm.placeholder(shape_y, name="data_2", dtype=input_data_type) + + res = add_compute(data_x, data_y, output_z, kernel_name) + + with tvm.target.cce(): + schedule = tbe.auto_schedule(res) + + config = {"name": kernel_name, + "tensor_list": (data_x, data_y, res)} + tbe.build(schedule, config) diff --git a/mindspore/lite/tools/kernel_builder/ascend/tbe_dsl/sample/tbe/op_info_cfg/ai_core/ascend310/add_dsl.ini b/mindspore/lite/tools/kernel_builder/ascend/tbe_dsl/sample/tbe/op_info_cfg/ai_core/ascend310/add_dsl.ini new file mode 100644 index 00000000000..5d91fb6996c --- /dev/null +++ b/mindspore/lite/tools/kernel_builder/ascend/tbe_dsl/sample/tbe/op_info_cfg/ai_core/ascend310/add_dsl.ini @@ -0,0 +1,18 @@ +[AddDsl] +input0.name=x1 +input0.dtype=float16,float16,float16,float16,float,float,float,float,int32,int32,int32,int32 +input0.shape=all +input0.paramType=required +input0.format=NCHW,NC1HWC0,NHWC,ND,NCHW,NC1HWC0,NHWC,ND,NCHW,NC1HWC0,NHWC,ND +input1.name=x2 +input1.dtype=float16,float16,float16,float16,float,float,float,float,int32,int32,int32,int32 +input1.shape=all +input1.paramType=required +input1.format=NCHW,NC1HWC0,NHWC,ND,NCHW,NC1HWC0,NHWC,ND,NCHW,NC1HWC0,NHWC,ND +output0.name=y +output0.dtype=float16,float16,float16,float16,float,float,float,float,int32,int32,int32,int32 +output0.shape=all +output0.paramType=required +output0.format=NCHW,NC1HWC0,NHWC,ND,NCHW,NC1HWC0,NHWC,ND,NCHW,NC1HWC0,NHWC,ND +opFile.value=add_dsl +opInterface.value=add_dsl diff --git a/mindspore/lite/tools/kernel_builder/ascend/tbe_dsl/sample/tbe/op_info_cfg/ai_core/ascend310p/add_dsl.ini b/mindspore/lite/tools/kernel_builder/ascend/tbe_dsl/sample/tbe/op_info_cfg/ai_core/ascend310p/add_dsl.ini new file mode 100644 index 00000000000..5d91fb6996c --- /dev/null +++ b/mindspore/lite/tools/kernel_builder/ascend/tbe_dsl/sample/tbe/op_info_cfg/ai_core/ascend310p/add_dsl.ini @@ -0,0 +1,18 @@ +[AddDsl] +input0.name=x1 +input0.dtype=float16,float16,float16,float16,float,float,float,float,int32,int32,int32,int32 +input0.shape=all +input0.paramType=required +input0.format=NCHW,NC1HWC0,NHWC,ND,NCHW,NC1HWC0,NHWC,ND,NCHW,NC1HWC0,NHWC,ND +input1.name=x2 +input1.dtype=float16,float16,float16,float16,float,float,float,float,int32,int32,int32,int32 +input1.shape=all +input1.paramType=required +input1.format=NCHW,NC1HWC0,NHWC,ND,NCHW,NC1HWC0,NHWC,ND,NCHW,NC1HWC0,NHWC,ND +output0.name=y +output0.dtype=float16,float16,float16,float16,float,float,float,float,int32,int32,int32,int32 +output0.shape=all +output0.paramType=required +output0.format=NCHW,NC1HWC0,NHWC,ND,NCHW,NC1HWC0,NHWC,ND,NCHW,NC1HWC0,NHWC,ND +opFile.value=add_dsl +opInterface.value=add_dsl diff --git a/mindspore/lite/tools/kernel_builder/ascend/tbe_dsl/sample/tbe/op_info_cfg/ai_core/ascend910/add_dsl.ini b/mindspore/lite/tools/kernel_builder/ascend/tbe_dsl/sample/tbe/op_info_cfg/ai_core/ascend910/add_dsl.ini new file mode 100644 index 00000000000..5d91fb6996c --- /dev/null +++ b/mindspore/lite/tools/kernel_builder/ascend/tbe_dsl/sample/tbe/op_info_cfg/ai_core/ascend910/add_dsl.ini @@ -0,0 +1,18 @@ +[AddDsl] +input0.name=x1 +input0.dtype=float16,float16,float16,float16,float,float,float,float,int32,int32,int32,int32 +input0.shape=all +input0.paramType=required +input0.format=NCHW,NC1HWC0,NHWC,ND,NCHW,NC1HWC0,NHWC,ND,NCHW,NC1HWC0,NHWC,ND +input1.name=x2 +input1.dtype=float16,float16,float16,float16,float,float,float,float,int32,int32,int32,int32 +input1.shape=all +input1.paramType=required +input1.format=NCHW,NC1HWC0,NHWC,ND,NCHW,NC1HWC0,NHWC,ND,NCHW,NC1HWC0,NHWC,ND +output0.name=y +output0.dtype=float16,float16,float16,float16,float,float,float,float,int32,int32,int32,int32 +output0.shape=all +output0.paramType=required +output0.format=NCHW,NC1HWC0,NHWC,ND,NCHW,NC1HWC0,NHWC,ND,NCHW,NC1HWC0,NHWC,ND +opFile.value=add_dsl +opInterface.value=add_dsl diff --git a/mindspore/lite/tools/kernel_builder/ascend/tbe_tik/sample/op_proto/CMakeLists.txt b/mindspore/lite/tools/kernel_builder/ascend/tbe_tik/sample/op_proto/CMakeLists.txt new file mode 100644 index 00000000000..1eebd6abbad --- /dev/null +++ b/mindspore/lite/tools/kernel_builder/ascend/tbe_tik/sample/op_proto/CMakeLists.txt @@ -0,0 +1,19 @@ +# Copyright (c) Huawei Technologies Co., Ltd. 2020. All rights reserved. +set(CMAKE_CXX_COMPILER g++) +set(CMAKE_C_COMPILER gcc) +# add source files +aux_source_directory(. SRCS) + +if("x${SRCS}" STREQUAL "x") + add_custom_target(${OP_PROTO_TARGET} + COMMAND mkdir -p ${OP_PROTO_TARGET_OUT_DIR} + COMMAND echo "no source to make lib${OP_PROTO_TARGET}.so") + return(0) +endif() + +set(LIBRARY_OUTPUT_PATH ${OP_PROTO_TARGET_OUT_DIR}) + +message(STATUS "OP_PROTO_TARGET=${OP_PROTO_TARGET}") +add_library(${OP_PROTO_TARGET} SHARED ${SRCS}) + +target_link_libraries(${OP_PROTO_TARGET} ${ASCEND_INC}/../lib64/libgraph.so) diff --git a/mindspore/lite/tools/kernel_builder/ascend/tbe_tik/sample/op_proto/matmul_tik.cc b/mindspore/lite/tools/kernel_builder/ascend/tbe_tik/sample/op_proto/matmul_tik.cc new file mode 100644 index 00000000000..82a5dd444f8 --- /dev/null +++ b/mindspore/lite/tools/kernel_builder/ascend/tbe_tik/sample/op_proto/matmul_tik.cc @@ -0,0 +1,42 @@ +#include "matmul_tik.h" +#include +#include + +namespace ge { + +IMPLEMT_VERIFIER(MatmulTik, MatmulTikVerify) { + std::vector support_list; + support_list.reserve(5); + support_list.push_back(DT_FLOAT16); + support_list.push_back(DT_FLOAT); + support_list.push_back(DT_INT32); + support_list.push_back(DT_INT8); + support_list.push_back(DT_UINT8); + + return GRAPH_SUCCESS; +} + +// Obtains the processing function of the output tensor description. +IMPLEMT_COMMON_INFERFUNC(MatmulTikInferShape) { + TensorDesc tensordesc_output = op.GetOutputDescByName("y"); + ge::TensorDesc inputTensorDescX = op.GetInputDescByName("x1"); + ge::TensorDesc inputTensorDescY = op.GetInputDescByName("x2"); + ge::Shape shapeX = inputTensorDescX.GetShape(); + ge::Shape shapeY = inputTensorDescY.GetShape(); + DataType dtype = inputTensorDescX.GetDataType(); + std::vector dimVector; + dimVector.push_back(shapeX.GetDim(0)); + dimVector.push_back(shapeY.GetDim(1)); + ge::Shape outputShape(dimVector); + tensordesc_output.SetShape(outputShape); + tensordesc_output.SetDataType(dtype); + (void)op.UpdateOutputDesc("y", tensordesc_output); + return GRAPH_SUCCESS; +} + +// Registered inferfunction +COMMON_INFER_FUNC_REG(MatmulTik, MatmulTikInferShape); + +// Registered verify function +VERIFY_FUNC_REG(MatmulTik, MatmulTikVerify); +} // namespace ge diff --git a/mindspore/lite/tools/kernel_builder/ascend/tbe_tik/sample/op_proto/matmul_tik.h b/mindspore/lite/tools/kernel_builder/ascend/tbe_tik/sample/op_proto/matmul_tik.h new file mode 100644 index 00000000000..e2a62b41e35 --- /dev/null +++ b/mindspore/lite/tools/kernel_builder/ascend/tbe_tik/sample/op_proto/matmul_tik.h @@ -0,0 +1,14 @@ +#ifndef GE_OP_MATMULTIK_H +#define GE_OP_MATMULTIK_H + +#include "graph/operator_reg.h" + +namespace ge { +REG_OP(MatmulTik) + .INPUT(x1, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) + .INPUT(x2, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) + .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) + .OP_END_FACTORY_REG(MatmulTik) +} + +#endif // GE_OP_MATMULTIK_H \ No newline at end of file diff --git a/mindspore/lite/tools/kernel_builder/ascend/tbe_tik/sample/tbe/impl/__init__.py b/mindspore/lite/tools/kernel_builder/ascend/tbe_tik/sample/tbe/impl/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/mindspore/lite/tools/kernel_builder/ascend/tbe_tik/sample/tbe/impl/matmul_tik.py b/mindspore/lite/tools/kernel_builder/ascend/tbe_tik/sample/tbe/impl/matmul_tik.py new file mode 100755 index 00000000000..d374832e817 --- /dev/null +++ b/mindspore/lite/tools/kernel_builder/ascend/tbe_tik/sample/tbe/impl/matmul_tik.py @@ -0,0 +1,210 @@ +""" +Copyright 2020 Huawei Technologies Co., Ltd. All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +matmul_tik +""" + +from tbe import tik +from tbe.common.platform import get_soc_spec + +DTYPE_SIZE = { + 'bool': 1, + 'uint8': 1, + 'int8': 1, + 'uint16': 2, + 'int16': 2, + 'int24': 3, + 'uint32': 4, + 'int32': 4, + 'float16': 2, + 'float32': 4, + 'int48': 6, + 'int64': 8, + 'uint64': 8, + 'float64':8 +} + + +def MK_TO_K1MK0(tik_instance, mk_input_tensor, k1mk0_tensor, dtype, k1, m, k0): + """data move mk to k1mk0""" + src_ub = tik_instance.Tensor(dtype, (k1, m, k0), name='src_ub', scope=tik.scope_ubuf) + + # data_move(m, k) ---> (k1, m, k0) + with tik_instance.for_range(0, k1) as i: + tik_instance.data_move(src_ub[i * m * k0:], mk_input_tensor[i * k0:], 0, m, k0 * DTYPE_SIZE[dtype] // 32, + (k1 - 1) * k0 * DTYPE_SIZE[dtype] // 32, 0) + + tik_instance.data_move(k1mk0_tensor, src_ub, 0, 1, k1 * m * k0 * DTYPE_SIZE[dtype] // 32, 0, 0) + + +def KN_TO_K1NK0(tik_instance, kn_input_tensor, k1nk0_tensor, dtype, k1, n, k0): + """data move kn to k1nk0""" + + with tik_instance.for_range(0, k1) as index: + k1nk0_ub = tik_instance.Tensor(dtype, (n, k0), tik.scope_ubuf, "k1nk0_ub") + src_ub = tik_instance.Tensor(dtype, (k0, n), tik.scope_ubuf, "src_ub") + burst_len = k0 * n * DTYPE_SIZE[dtype] // 32 + tik_instance.data_move(src_ub, kn_input_tensor[index * k0 * n], 0, 1, burst_len, 0, 0) + dst_list = [k1nk0_ub[16 * i] for i in range(16)] + src_list = [src_ub[n * i] for i in range(16)] + rep_times = n // k0 + dst_rep_stride = k0 + src_rep_stride = 1 + tik_instance.vec_trans_scatter(False, False, dst_list, src_list, rep_times, dst_rep_stride, src_rep_stride) + tik_instance.data_move(k1nk0_tensor[index * k0 * n], k1nk0_ub, 0, 1, burst_len, 0, 0) + + +def N1MN0_TO_MN(tik_instance, mn_output_tensor, n1mn0_tensor, dtype, n1, m, n0): + """data move mn to n1mn0""" + src_ub = tik_instance.Tensor(dtype, (m, n1 * n0), name='src_ub', scope=tik.scope_ubuf) + + # data_move(n1, m, n0) ---> (m, n) + with tik_instance.for_range(0, n1) as i: + tik_instance.data_move(src_ub[i * n0:], n1mn0_tensor[i * m * n0:], 0, m, + n0 * DTYPE_SIZE[dtype] // 32, 0, (n1 - 1) * n0 * DTYPE_SIZE[dtype] // 32) + + tik_instance.data_move(mn_output_tensor, src_ub, 0, 1, m * n1 * n0 * DTYPE_SIZE[dtype] // 32, 0, 0) + + +def matmul_tik_compute(params, kernel_name): + """ + matmul tik compute + @param params: matmul data + @param kernel_name: kernel name + @return: tik instance + """ + tik_instance = tik.Tik() + if not isinstance(params, dict): + params = params.__dict__ + m_size, k_size, n_size = params['M'], params['K'], params['N'] + data_type = params["data_type"] + m_tiling_size = int(params["m_tiling_size"]) + n_tiling_size = int(params["n_tiling_size"]) + k_tiling_size = int(params['k_tiling_size']) + + m_cycle_times = params["m_cycle_times"] + n_cycle_times = params["n_cycle_times"] + k_cycle_times = params["k_cycle_times"] + + # Determine the output type + if data_type == "float16": + if get_soc_spec("SOC_VERSION") in ["SD3403", "OPTG", "Hi3796CV300CS", "TsnsC"]: + C_loc_out_type = "float16" + else: + C_loc_out_type = "float32" + K0 = 16 + else: + C_loc_out_type = "int32" + K0 = 32 + block_size = 16 + + n_thread_num = params['n_thread_num'] + m_thread_num = params['m_thread_num'] + k_thread_num = params['k_thread_num'] + + mk_gm_input = tik_instance.Tensor(data_type, (m_size, k_size), name="mk_input_gm", scope=tik.scope_gm) + kn_gm_input = tik_instance.Tensor(data_type, (k_size, n_size), name="kn_input_gm", scope=tik.scope_gm) + + k1mk0_workspace = tik_instance.Tensor(data_type, (k_size // K0, m_size, K0), name="k1mk0_workspace", + scope=tik.scope_gm, is_workspace=True) + + k1nk0_workspace = tik_instance.Tensor(data_type, (k_size // K0, n_size, K0), name="k1nk0_workspace", + scope=tik.scope_gm, is_workspace=True) + + mn_gm_output = tik_instance.Tensor(C_loc_out_type, (m_size, n_size), tik.scope_gm, name="mn_output_gm") + nmk0_workspace = tik_instance.Tensor(C_loc_out_type, (n_size // block_size, m_size, block_size), + name="nmk0_workspace", scope=tik.scope_gm, is_workspace=True) + + MK_TO_K1MK0(tik_instance, mk_gm_input, k1mk0_workspace, data_type, k_size // K0, m_size, K0) + KN_TO_K1NK0(tik_instance, kn_gm_input, k1nk0_workspace, data_type, k_size // K0, n_size, K0) + + # Tiling is realized through the for_range() loop. + with tik_instance.for_range(0, 2, block_num = 1) as core_id: + with tik_instance.for_range(0, n_cycle_times // 2, thread_num=n_thread_num) as n_idx: + with tik_instance.for_range(0, m_cycle_times, thread_num=m_thread_num) as m_idx: + dst_l0c = tik_instance.Tensor(C_loc_out_type, [n_tiling_size // 16, m_tiling_size, 16], name='dst_l0c', + scope=tik.scope_cbuf_out) + with tik_instance.for_range(0, k_cycle_times, + thread_num=k_thread_num) as k_idx: + # Calculation result data transfer. + inputa_l1 = tik_instance.Tensor(params['data_type'], [k_tiling_size // K0, m_tiling_size, K0], + name="A_tiling_l1", scope=tik.scope_cbuf) + tik_instance.data_move(inputa_l1, + k1mk0_workspace[k_idx * k_tiling_size // K0, m_idx * m_tiling_size, :], + 0, k_tiling_size // K0, m_tiling_size, m_size - m_tiling_size, 0) + inputb_l1 = tik_instance.Tensor(params["data_type"], [k_tiling_size // K0, n_tiling_size, K0], + name="B_tiling_l1", scope=tik.scope_cbuf) + if n_size - n_tiling_size > 65535: + with tik_instance.for_range(0, k_tiling_size // K0) \ + as dma_k_idx: + tik_instance.data_move(inputb_l1[dma_k_idx, :, :], + k1nk0_workspace[k_idx * k_tiling_size // K0 + dma_k_idx, + (core_id * n_cycle_times // 2 + n_idx) * n_tiling_size, :], + 0, 1, n_tiling_size, 0, 0) + else: + tik_instance.data_move(inputb_l1, k1nk0_workspace[k_idx * k_tiling_size // K0, + (core_id * n_cycle_times // 2 + n_idx) * n_tiling_size, :], + 0, k_tiling_size // K0, n_tiling_size, n_size - n_tiling_size, 0) + # Call matmul API to matrix multiplication calculation. + with tik_instance.if_scope(k_idx == 0): + tik_instance.matmul(dst_l0c, inputa_l1, inputb_l1, m_tiling_size, k_tiling_size, n_tiling_size, + init_l1out=True) + with tik_instance.else_scope(): + tik_instance.matmul(dst_l0c, inputa_l1, inputb_l1, m_tiling_size, k_tiling_size, n_tiling_size, + init_l1out=False) + tik_instance.fixpipe(nmk0_workspace[n_tiling_size // 16 * (core_id * n_cycle_times // 2 + n_idx), + m_idx * m_tiling_size, :], dst_l0c, n_tiling_size // 16, m_tiling_size * 16 * + DTYPE_SIZE[C_loc_out_type]//32, + (m_size - m_tiling_size) * 16 * DTYPE_SIZE[C_loc_out_type] // 32, 0) + + N1MN0_TO_MN(tik_instance, mn_gm_output, nmk0_workspace, C_loc_out_type, n_size // K0, m_size, K0) + + tik_instance.BuildCCE(kernel_name=kernel_name, inputs=[mk_gm_input, kn_gm_input], outputs=[mn_gm_output]) + return tik_instance + + +def matmul_tik(input_x1, input_x2, output_y=None, kernel_name="simple_matmul"): + """ + matmul_tik main func + Parameters + ---------- + input_x1: input data 1 + input_x2: input data 2 + output_y: output dta + """ + shape_a = input_x1.get("ori_shape") + shape_b = input_x2.get("ori_shape") + output_y = output_y + m = shape_a[0] + k = shape_a[1] + n = shape_b[1] + data_type = input_x1.get("dtype").lower() + params = { + 'M': m, + 'K': k, + 'N': n, + 'data_type': data_type, + 'm_tiling_size': 16, + 'm_cycle_times': 1, + 'm_thread_num': 1, + 'n_tiling_size': 64, + 'n_cycle_times': 16, + 'n_thread_num': 1, + 'k_tiling_size': 32, + 'k_cycle_times': 2, + 'k_thread_num': 2, + 'output_y':output_y + } + return matmul_tik_compute(params, kernel_name) diff --git a/mindspore/lite/tools/kernel_builder/ascend/tbe_tik/sample/tbe/op_info_cfg/ai_core/ascend310/matmul_tik.ini b/mindspore/lite/tools/kernel_builder/ascend/tbe_tik/sample/tbe/op_info_cfg/ai_core/ascend310/matmul_tik.ini new file mode 100755 index 00000000000..9f5f156633c --- /dev/null +++ b/mindspore/lite/tools/kernel_builder/ascend/tbe_tik/sample/tbe/op_info_cfg/ai_core/ascend310/matmul_tik.ini @@ -0,0 +1,20 @@ +[MatmulTik] +input0.name=x1 +input0.dtype=int8,uint8,float16 +input0.shape=all +input0.needCompile=false +input0.paramType=required +input0.format=ND,ND,ND +input1.name=x2 +input1.dtype=int8,int8,float16 +input1.shape=all +input1.needCompile=false +input1.paramType=required +input1.format=ND,ND,ND +output0.name=y +output0.dtype=int32,int32,float +output0.shape=all +output0.paramType=required +output0.format=ND,ND,ND +opFile.value=matmul_tik +opInterface.value=matmul_tik diff --git a/mindspore/lite/tools/kernel_builder/ascend/tbe_tik/sample/tbe/op_info_cfg/ai_core/ascend310p/matmul_tik.ini b/mindspore/lite/tools/kernel_builder/ascend/tbe_tik/sample/tbe/op_info_cfg/ai_core/ascend310p/matmul_tik.ini new file mode 100755 index 00000000000..9f5f156633c --- /dev/null +++ b/mindspore/lite/tools/kernel_builder/ascend/tbe_tik/sample/tbe/op_info_cfg/ai_core/ascend310p/matmul_tik.ini @@ -0,0 +1,20 @@ +[MatmulTik] +input0.name=x1 +input0.dtype=int8,uint8,float16 +input0.shape=all +input0.needCompile=false +input0.paramType=required +input0.format=ND,ND,ND +input1.name=x2 +input1.dtype=int8,int8,float16 +input1.shape=all +input1.needCompile=false +input1.paramType=required +input1.format=ND,ND,ND +output0.name=y +output0.dtype=int32,int32,float +output0.shape=all +output0.paramType=required +output0.format=ND,ND,ND +opFile.value=matmul_tik +opInterface.value=matmul_tik diff --git a/mindspore/lite/tools/kernel_builder/ascend/tbe_tik/sample/tbe/op_info_cfg/ai_core/ascend910/matmul_tik.ini b/mindspore/lite/tools/kernel_builder/ascend/tbe_tik/sample/tbe/op_info_cfg/ai_core/ascend910/matmul_tik.ini new file mode 100755 index 00000000000..9f5f156633c --- /dev/null +++ b/mindspore/lite/tools/kernel_builder/ascend/tbe_tik/sample/tbe/op_info_cfg/ai_core/ascend910/matmul_tik.ini @@ -0,0 +1,20 @@ +[MatmulTik] +input0.name=x1 +input0.dtype=int8,uint8,float16 +input0.shape=all +input0.needCompile=false +input0.paramType=required +input0.format=ND,ND,ND +input1.name=x2 +input1.dtype=int8,int8,float16 +input1.shape=all +input1.needCompile=false +input1.paramType=required +input1.format=ND,ND,ND +output0.name=y +output0.dtype=int32,int32,float +output0.shape=all +output0.paramType=required +output0.format=ND,ND,ND +opFile.value=matmul_tik +opInterface.value=matmul_tik