forked from OSSInnovation/mindspore
fused select and greater op to improve bert perfermance on GPU
This commit is contained in:
parent
6240189190
commit
a38d6139fa
2
akg
2
akg
|
@ -1 +1 @@
|
|||
Subproject commit d237aa7d8e9d3fb709bda9f30205b02129bc2b59
|
||||
Subproject commit 4d897c23fc41c5f7013efd0c517796233671518a
|
|
@ -47,7 +47,8 @@ bool IsBasicOp(const AnfNodePtr &node, bool is_before_kernel_select) {
|
|||
prim::kPrimAbs, prim::kPrimRound, prim::kPrimNeg, prim::kPrimExp, prim::kPrimTensorAdd,
|
||||
prim::kPrimRealDiv, prim::kPrimMul, prim::kPrimMinimum, prim::kPrimMaximum, prim::kPrimLog,
|
||||
prim::kPrimPow, prim::kPrimSub, prim::kPrimRsqrt, prim::kPrimSqrt, prim::kPrimCast,
|
||||
prim::kPrimAddN, prim::kPrimEqual, prim::kPrimReciprocal, prim::KPrimTransData};
|
||||
prim::kPrimAddN, prim::kPrimEqual, prim::kPrimReciprocal, prim::KPrimTransData, prim::kPrimSelect,
|
||||
prim::kPrimGreater};
|
||||
#else
|
||||
std::vector<PrimitivePtr> fusable_basic_ops;
|
||||
#endif
|
||||
|
|
|
@ -53,7 +53,8 @@ bool IsBasicFuseOp(const AnfNodePtr &node, bool is_before_kernel_select) {
|
|||
prim::kPrimAbs, prim::kPrimRound, prim::kPrimNeg, prim::kPrimExp, prim::kPrimTensorAdd,
|
||||
prim::kPrimRealDiv, prim::kPrimMul, prim::kPrimMinimum, prim::kPrimMaximum, prim::kPrimLog,
|
||||
prim::kPrimPow, prim::kPrimSub, prim::kPrimRsqrt, prim::kPrimSqrt, prim::kPrimCast,
|
||||
prim::kPrimAddN, prim::kPrimEqual, prim::kPrimReciprocal, prim::KPrimTransData};
|
||||
prim::kPrimAddN, prim::kPrimEqual, prim::kPrimReciprocal, prim::KPrimTransData, prim::kPrimSelect,
|
||||
prim::kPrimGreater};
|
||||
#else
|
||||
std::vector<PrimitivePtr> basic_ops;
|
||||
#endif
|
||||
|
|
|
@ -52,5 +52,7 @@ from .squeeze import _squeeze_akg
|
|||
from .squeeze_grad import _squeeze_grad_akg
|
||||
from .sub import _sub_akg
|
||||
from .tile import _tile_akg
|
||||
from .select import _select_akg
|
||||
from .greater import _greater_akg
|
||||
|
||||
# Please insert op register in lexicographical order of the filename.
|
||||
|
|
|
@ -0,0 +1,34 @@
|
|||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
|
||||
"""Greater op"""
|
||||
from mindspore.ops.op_info_register import op_info_register, AkgGpuRegOp, DataType as DT
|
||||
|
||||
op_info = AkgGpuRegOp("Greater") \
|
||||
.fusion_type("ELEMWISE") \
|
||||
.input(0, "x") \
|
||||
.input(1, "y") \
|
||||
.output(0, "output") \
|
||||
.dtype_format(DT.F16_Default, DT.F16_Default, DT.BOOL_Default) \
|
||||
.dtype_format(DT.F32_Default, DT.F32_Default, DT.BOOL_Default) \
|
||||
.dtype_format(DT.F16_5HD, DT.F16_5HD, DT.BOOL_5HD) \
|
||||
.dtype_format(DT.F32_5HD, DT.F32_5HD, DT.BOOL_5HD) \
|
||||
.get_op_info()
|
||||
|
||||
|
||||
@op_info_register(op_info)
|
||||
def _greater_akg():
|
||||
"""Greater Akg register"""
|
||||
return
|
|
@ -0,0 +1,37 @@
|
|||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
|
||||
"""Select op"""
|
||||
from mindspore.ops.op_info_register import op_info_register, AkgGpuRegOp, DataType as DT
|
||||
|
||||
op_info = AkgGpuRegOp("Select") \
|
||||
.fusion_type("ELEMWISE") \
|
||||
.input(0, "condition") \
|
||||
.input(1, "x") \
|
||||
.input(2, "y") \
|
||||
.output(0, "output") \
|
||||
.dtype_format(DT.BOOL_Default, DT.F16_Default, DT.F16_Default, DT.F16_Default) \
|
||||
.dtype_format(DT.BOOL_Default, DT.F32_Default, DT.F32_Default, DT.F32_Default) \
|
||||
.dtype_format(DT.BOOL_Default, DT.I32_Default, DT.I32_Default, DT.I32_Default) \
|
||||
.dtype_format(DT.BOOL_5HD, DT.F16_5HD, DT.F16_5HD, DT.F16_5HD) \
|
||||
.dtype_format(DT.BOOL_5HD, DT.F32_5HD, DT.F32_5HD, DT.F32_5HD) \
|
||||
.dtype_format(DT.BOOL_5HD, DT.I32_5HD, DT.I32_5HD, DT.I32_5HD) \
|
||||
.get_op_info()
|
||||
|
||||
|
||||
@op_info_register(op_info)
|
||||
def _select_akg():
|
||||
"""Select Akg register"""
|
||||
return
|
Loading…
Reference in New Issue