forked from mindspore-Ecosystem/mindspore
add akg load_im2col
add load_im2col add load_im2col add load_im2col
This commit is contained in:
parent
9720bab9c9
commit
9bf4b67b64
|
@ -112,6 +112,12 @@ void KernelQuery(const CNodePtr &kernel_node, std::vector<std::shared_ptr<kernel
|
|||
if (IsPrimitiveCNode(kernel_node, kPrimProdForceSeA)) {
|
||||
kernel_type = KernelType::AKG_KERNEL;
|
||||
}
|
||||
|
||||
const PrimitivePtr kPrimLoadIm2Col = std::make_shared<Primitive>("LoadIm2Col");
|
||||
if (IsPrimitiveCNode(kernel_node, kPrimLoadIm2Col)) {
|
||||
kernel_type = KernelType::AKG_KERNEL;
|
||||
} // use LoadIm2Col only for THOR optimizer
|
||||
|
||||
switch (kernel_type) {
|
||||
case KernelType::AKG_KERNEL:
|
||||
AkgMetadataInfo(kernel_node, kernel_info_list);
|
||||
|
|
|
@ -44,5 +44,6 @@ from .sqrt import _sqrt_akg
|
|||
from .square import _square_akg
|
||||
from .sub import _sub_akg
|
||||
from .prod_force_se_a import _prod_force_se_a_akg
|
||||
from .load_im2col import _load_im2col_akg
|
||||
|
||||
# Please insert op register in lexicographical order of the filename.
|
||||
|
|
|
@ -0,0 +1,33 @@
|
|||
# Copyright 2021 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
|
||||
"""LoadIm2Col op"""
|
||||
from mindspore.ops.op_info_register import op_info_register, AkgAscendRegOp, DataType as DT
|
||||
|
||||
op_info = AkgAscendRegOp("LoadIm2Col") \
|
||||
.fusion_type("OPAQUE") \
|
||||
.input(0, "x") \
|
||||
.output(0, "output") \
|
||||
.attr("ksizes", "required", "listInt") \
|
||||
.attr("strides", "required", "listInt") \
|
||||
.dtype_format(DT.F16_5HD, DT.F16_Default) \
|
||||
.dtype_format(DT.F32_5HD, DT.F32_Default) \
|
||||
.get_op_info()
|
||||
|
||||
|
||||
@op_info_register(op_info)
|
||||
def _load_im2col_akg():
|
||||
"""im2col Akg register"""
|
||||
return
|
|
@ -96,7 +96,8 @@ from .other_ops import (Assign, InplaceAssign, IOU, BoundingBoxDecode, BoundingB
|
|||
from ._thor_ops import (CusBatchMatMul, CusCholeskyTrsm, CusFusedAbsMax1, CusImg2Col, CusMatMulCubeDenseLeft,
|
||||
CusMatMulCubeFraczRightMul, CusMatMulCube, CusMatrixCombine, CusTranspose02314,
|
||||
CusMatMulCubeDenseRight,
|
||||
CusMatMulCubeFraczLeftCast, Im2Col, UpdateThorGradient, Cholesky, CholeskyTrsm, DetTriangle,
|
||||
CusMatMulCubeFraczLeftCast, Im2Col, LoadIm2Col, UpdateThorGradient, Cholesky, CholeskyTrsm,
|
||||
DetTriangle,
|
||||
ProdForceSeA)
|
||||
from .sparse_ops import (SparseToDense, SparseTensorDenseMatmul)
|
||||
from ._embedding_cache_ops import (CacheSwapTable, UpdateCache, MapCacheIdx, SubAndFilter,
|
||||
|
|
|
@ -31,6 +31,7 @@ __all__ = ["CusBatchMatMul",
|
|||
"CusTranspose02314",
|
||||
"CusMatMulCubeDenseRight",
|
||||
"CusMatMulCubeFraczLeftCast",
|
||||
"LoadIm2Col"
|
||||
]
|
||||
|
||||
|
||||
|
@ -362,6 +363,7 @@ class CusTranspose02314(PrimitiveWithInfer):
|
|||
|
||||
def get_bprop(self):
|
||||
"""Get backprop for CusTranspose02314."""
|
||||
|
||||
def bprop(x, out, dout):
|
||||
return (C.zeros_like(x),)
|
||||
|
||||
|
@ -529,6 +531,55 @@ class Im2Col(PrimitiveWithInfer):
|
|||
return x_dtype
|
||||
|
||||
|
||||
class LoadIm2Col(PrimitiveWithInfer):
|
||||
"""
|
||||
extracts image patches from image.
|
||||
|
||||
The rank of input_x1 must be `4`, data_format is "NCHW".
|
||||
Only supports when C is divisible by 16.
|
||||
|
||||
Inputs:
|
||||
- **input_x1** (Tensor) - The feature map.
|
||||
The shape of the tensor is :math:`(N, C, H, W)`.
|
||||
Outputs:
|
||||
Tensor.
|
||||
Examples:
|
||||
>>> input_x = Tensor(np.random.rand(32, 16, 224, 224).astype(np.float16))
|
||||
>>> img2col = ops.LoadIm2Col(kernel_size=(7,7), stride=(2,2))
|
||||
>>> output = img2col(input_x)
|
||||
"""
|
||||
|
||||
@prim_attr_register
|
||||
def __init__(self,
|
||||
ksizes,
|
||||
strides,
|
||||
pad_mode="same",
|
||||
dilates=(1, 1, 1, 1)):
|
||||
"""Initialize LoadIm2Col"""
|
||||
|
||||
self.init_prim_io_names(inputs=['x1'], outputs=['y'])
|
||||
self.ksizes = ksizes
|
||||
self.strides = strides
|
||||
self.pad_mode = validator.check_string(pad_mode, ['same'], 'pad_mode', self.name)
|
||||
self.dilation = dilates
|
||||
|
||||
def infer_shape(self, data1_shape):
|
||||
bs, c, h, w = data1_shape
|
||||
stride_h, stride_w = self.strides
|
||||
k_w, k_h = self.ksizes
|
||||
h_out = math.ceil(h / stride_h)
|
||||
w_out = math.ceil(w / stride_w)
|
||||
m = h_out * w_out
|
||||
if m % 16 != 0:
|
||||
shape = [(bs * m) // 16, (c * k_h * k_w) // 16, 16, 16]
|
||||
else:
|
||||
shape = [bs, m // 16, (c * k_h * k_w) // 16, 16, 16]
|
||||
return shape
|
||||
|
||||
def infer_dtype(self, data1_dtype):
|
||||
return data1_dtype
|
||||
|
||||
|
||||
class UpdateThorGradient(PrimitiveWithInfer):
|
||||
"""
|
||||
Updates Thor Gradient with Approximate Fisher info matrix(for GPU backend).
|
||||
|
|
Loading…
Reference in New Issue