fused select and greater op to improve bert perfermance on GPU

This commit is contained in:
zengzitao 2020-09-14 10:56:16 +08:00
parent 6240189190
commit a38d6139fa
6 changed files with 84 additions and 9 deletions

2
akg

@ -1 +1 @@
Subproject commit d237aa7d8e9d3fb709bda9f30205b02129bc2b59
Subproject commit 4d897c23fc41c5f7013efd0c517796233671518a

View File

@ -47,7 +47,8 @@ bool IsBasicOp(const AnfNodePtr &node, bool is_before_kernel_select) {
prim::kPrimAbs, prim::kPrimRound, prim::kPrimNeg, prim::kPrimExp, prim::kPrimTensorAdd,
prim::kPrimRealDiv, prim::kPrimMul, prim::kPrimMinimum, prim::kPrimMaximum, prim::kPrimLog,
prim::kPrimPow, prim::kPrimSub, prim::kPrimRsqrt, prim::kPrimSqrt, prim::kPrimCast,
prim::kPrimAddN, prim::kPrimEqual, prim::kPrimReciprocal, prim::KPrimTransData};
prim::kPrimAddN, prim::kPrimEqual, prim::kPrimReciprocal, prim::KPrimTransData, prim::kPrimSelect,
prim::kPrimGreater};
#else
std::vector<PrimitivePtr> fusable_basic_ops;
#endif

View File

@ -53,7 +53,8 @@ bool IsBasicFuseOp(const AnfNodePtr &node, bool is_before_kernel_select) {
prim::kPrimAbs, prim::kPrimRound, prim::kPrimNeg, prim::kPrimExp, prim::kPrimTensorAdd,
prim::kPrimRealDiv, prim::kPrimMul, prim::kPrimMinimum, prim::kPrimMaximum, prim::kPrimLog,
prim::kPrimPow, prim::kPrimSub, prim::kPrimRsqrt, prim::kPrimSqrt, prim::kPrimCast,
prim::kPrimAddN, prim::kPrimEqual, prim::kPrimReciprocal, prim::KPrimTransData};
prim::kPrimAddN, prim::kPrimEqual, prim::kPrimReciprocal, prim::KPrimTransData, prim::kPrimSelect,
prim::kPrimGreater};
#else
std::vector<PrimitivePtr> basic_ops;
#endif

View File

@ -52,5 +52,7 @@ from .squeeze import _squeeze_akg
from .squeeze_grad import _squeeze_grad_akg
from .sub import _sub_akg
from .tile import _tile_akg
from .select import _select_akg
from .greater import _greater_akg
# Please insert op register in lexicographical order of the filename.

View File

@ -0,0 +1,34 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Greater op"""
from mindspore.ops.op_info_register import op_info_register, AkgGpuRegOp, DataType as DT
op_info = AkgGpuRegOp("Greater") \
.fusion_type("ELEMWISE") \
.input(0, "x") \
.input(1, "y") \
.output(0, "output") \
.dtype_format(DT.F16_Default, DT.F16_Default, DT.BOOL_Default) \
.dtype_format(DT.F32_Default, DT.F32_Default, DT.BOOL_Default) \
.dtype_format(DT.F16_5HD, DT.F16_5HD, DT.BOOL_5HD) \
.dtype_format(DT.F32_5HD, DT.F32_5HD, DT.BOOL_5HD) \
.get_op_info()
@op_info_register(op_info)
def _greater_akg():
"""Greater Akg register"""
return

View File

@ -0,0 +1,37 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Select op"""
from mindspore.ops.op_info_register import op_info_register, AkgGpuRegOp, DataType as DT
op_info = AkgGpuRegOp("Select") \
.fusion_type("ELEMWISE") \
.input(0, "condition") \
.input(1, "x") \
.input(2, "y") \
.output(0, "output") \
.dtype_format(DT.BOOL_Default, DT.F16_Default, DT.F16_Default, DT.F16_Default) \
.dtype_format(DT.BOOL_Default, DT.F32_Default, DT.F32_Default, DT.F32_Default) \
.dtype_format(DT.BOOL_Default, DT.I32_Default, DT.I32_Default, DT.I32_Default) \
.dtype_format(DT.BOOL_5HD, DT.F16_5HD, DT.F16_5HD, DT.F16_5HD) \
.dtype_format(DT.BOOL_5HD, DT.F32_5HD, DT.F32_5HD, DT.F32_5HD) \
.dtype_format(DT.BOOL_5HD, DT.I32_5HD, DT.I32_5HD, DT.I32_5HD) \
.get_op_info()
@op_info_register(op_info)
def _select_akg():
"""Select Akg register"""
return