forked from OSSInnovation/mindspore
Add fp32 & int8 ops of Matmul(Batchmatmul)
This commit is contained in:
parent
201bcdd9af
commit
ccd6b9a415
|
@ -33,29 +33,30 @@ int MatMul::InferShape(std::vector<tensor::Tensor *> inputs_, std::vector<tensor
|
|||
auto output = outputs_.front();
|
||||
MS_ASSERT(output != nullptr);
|
||||
|
||||
std::vector<int> x_shape = input0->shape();
|
||||
std::vector<int> w_shape = input1->shape();
|
||||
if (x_shape.size() < 2 || w_shape.size() < 2) {
|
||||
std::vector<int> a_shape = input0->shape();
|
||||
std::vector<int> b_shape = input1->shape();
|
||||
if (a_shape.size() < 3 || b_shape.size() < 3) {
|
||||
MS_LOG(ERROR) << "inputs shape is invalid";
|
||||
return RET_INPUT_TENSOR_ERROR;
|
||||
}
|
||||
|
||||
for (int i = 0; i < a_shape.size() - 2; ++i) {
|
||||
if (a_shape[i] != b_shape[i]) {
|
||||
MS_LOG(ERROR) << "Op MatMul's dimensions must be equal";
|
||||
return RET_INPUT_TENSOR_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
auto matmul_prim = this->primitive->value_as_MatMul();
|
||||
if (matmul_prim->transposeA()) {
|
||||
int tmp = x_shape.back();
|
||||
x_shape[x_shape.size() - 1] = x_shape[x_shape.size() - 2];
|
||||
x_shape[x_shape.size() - 2] = tmp;
|
||||
std::swap(a_shape[a_shape.size() - 1], a_shape[a_shape.size() - 2]);
|
||||
}
|
||||
if (matmul_prim->transposeB()) {
|
||||
int tmp = w_shape.back();
|
||||
w_shape[w_shape.size() - 1] = w_shape[w_shape.size() - 2];
|
||||
w_shape[w_shape.size() - 2] = tmp;
|
||||
std::swap(b_shape[b_shape.size() - 1], b_shape[b_shape.size() - 2]);
|
||||
}
|
||||
auto y_shape_size = std::max(x_shape.size(), w_shape.size());
|
||||
std::vector<int> y_shape(y_shape_size);
|
||||
y_shape = x_shape;
|
||||
y_shape[y_shape_size - 1] = w_shape[w_shape.size() - 1];
|
||||
output->set_shape(y_shape);
|
||||
std::vector<int> c_shape(a_shape);
|
||||
c_shape[c_shape.size() - 1] = b_shape[b_shape.size() - 1];
|
||||
output->set_shape(c_shape);
|
||||
output->set_data_type(input0->data_type());
|
||||
output->SetFormat(input0->GetFormat());
|
||||
|
||||
|
|
|
@ -139,6 +139,8 @@ Primitive *Primitive::CreatePrimitive(schema::Primitive *primitive) {
|
|||
return new lite::SpaceToBatch(const_cast<schema::Primitive *>(primitive));
|
||||
case schema::PrimitiveType_QuantDTypeCast:
|
||||
return new lite::QuantDTypeCast(const_cast<schema::Primitive *>(primitive));
|
||||
case schema::PrimitiveType_MatMul:
|
||||
return new lite::MatMul(const_cast<schema::Primitive *>(primitive));
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -0,0 +1,72 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "src/runtime/kernel/arm/base/matmul_base.h"
|
||||
#include "src/runtime/kernel/arm/fp32/matmul.h"
|
||||
#include "src/runtime/kernel/arm/int8/matmul_int8.h"
|
||||
#include "src/kernel_factory.h"
|
||||
#include "include/errorcode.h"
|
||||
#include "include/context.h"
|
||||
|
||||
using mindspore::lite::KernelRegistrar;
|
||||
using mindspore::lite::RET_ERROR;
|
||||
using mindspore::lite::RET_OK;
|
||||
using mindspore::schema::PrimitiveType_MatMul;
|
||||
|
||||
namespace mindspore::kernel {
|
||||
kernel::LiteKernel *CpuMatmulKernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
|
||||
const std::vector<lite::tensor::Tensor *> &outputs, OpParameter *opParameter,
|
||||
const lite::Context *ctx, const kernel::KernelKey &desc) {
|
||||
MS_ASSERT(opParameter != nullptr);
|
||||
MS_ASSERT(desc.type == schema::PrimitiveType_Concat);
|
||||
auto input_tensor = inputs.at(kInputIndex);
|
||||
auto data_type = input_tensor->data_type();
|
||||
kernel::LiteKernel *kernel = nullptr;
|
||||
switch (data_type) {
|
||||
case kNumberTypeInt8:
|
||||
case kNumberTypeUInt8: {
|
||||
kernel = new (std::nothrow) MatmulInt8CPUKernel(opParameter, inputs, outputs, ctx);
|
||||
if (!kernel) {
|
||||
MS_LOG(ERROR) << "kernel is nullptr.";
|
||||
return nullptr;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case kNumberTypeFloat32: {
|
||||
kernel = new (std::nothrow) MatmulCPUKernel(opParameter, inputs, outputs, ctx);
|
||||
if (!kernel) {
|
||||
MS_LOG(ERROR) << "kernel is nullptr.";
|
||||
return nullptr;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
auto ret = kernel->Init();
|
||||
if (ret != RET_OK) {
|
||||
delete kernel;
|
||||
MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: "
|
||||
<< schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_));
|
||||
return nullptr;
|
||||
}
|
||||
return kernel;
|
||||
}
|
||||
|
||||
REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_MatMul, CpuMatmulKernelCreator)
|
||||
} // namespace mindspore::kernel
|
|
@ -0,0 +1,49 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_MATMUL_BASE_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_MATMUL_BASE_H_
|
||||
|
||||
#include <vector>
|
||||
#include "src/lite_kernel.h"
|
||||
#include "include/context.h"
|
||||
#include "src/runtime/kernel/arm/opclib/matmul.h"
|
||||
|
||||
using mindspore::lite::Context;
|
||||
|
||||
namespace mindspore::kernel {
|
||||
class MatmulBaseCPUKernel : public LiteKernel {
|
||||
public:
|
||||
MatmulBaseCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
|
||||
const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx)
|
||||
: LiteKernel(parameter, inputs, outputs), ctx_(ctx), thread_count_(ctx->threadNum) {
|
||||
params_ = reinterpret_cast<MatMulParameter *>(opParameter);
|
||||
}
|
||||
~MatmulBaseCPUKernel() = default;
|
||||
|
||||
int Init() override { return 0; }
|
||||
int ReSize() override { return 0; }
|
||||
int Run() override { return 0; }
|
||||
|
||||
protected:
|
||||
MatMulParameter *params_;
|
||||
int thread_count_;
|
||||
int thread_stride_;
|
||||
const Context *ctx_;
|
||||
};
|
||||
} // namespace mindspore::kernel
|
||||
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_MATMUL_BASE_H_
|
|
@ -15,44 +15,102 @@
|
|||
*/
|
||||
|
||||
#include "src/runtime/kernel/arm/fp32/matmul.h"
|
||||
#include <vector>
|
||||
#include "schema/model_generated.h"
|
||||
#include "src/kernel_registry.h"
|
||||
#include "src/runtime/kernel/arm/opclib/fp32/matmul.h"
|
||||
#include "src/runtime/runtime_api.h"
|
||||
#include "include/errorcode.h"
|
||||
|
||||
using mindspore::kernel::KERNEL_ARCH::kCPU;
|
||||
using mindspore::lite::KernelRegistrar;
|
||||
using mindspore::lite::RET_ERROR;
|
||||
using mindspore::lite::RET_MEMORY_FAILED;
|
||||
using mindspore::lite::RET_OK;
|
||||
using mindspore::schema::PrimitiveType_MatMul;
|
||||
|
||||
namespace mindspore::kernel {
|
||||
MatmulCPUKernel::~MatmulCPUKernel() {
|
||||
ctx_->allocator->Free(a_c8_ptr_);
|
||||
ctx_->allocator->Free(b_r8_ptr_);
|
||||
ctx_->allocator->Free(c_r8x8_ptr_);
|
||||
}
|
||||
|
||||
int MatmulCPUKernel::ReSize() { return RET_OK; }
|
||||
|
||||
int MatmulCPUKernel::Run() { return RET_OK; }
|
||||
|
||||
int MatmulCPUKernel::Init() { return RET_OK; }
|
||||
|
||||
kernel::LiteKernel *CpuMatmulFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
|
||||
const std::vector<lite::tensor::Tensor *> &outputs,
|
||||
OpParameter *opParameter, const lite::Context *ctx,
|
||||
const kernel::KernelKey &desc) {
|
||||
MS_ASSERT(desc.type == schema::PrimitiveType_MatMul);
|
||||
auto *kernel = new (std::nothrow) MatmulCPUKernel(opParameter, inputs, outputs);
|
||||
if (kernel == nullptr) {
|
||||
MS_LOG(ERROR) << "new MatmulCPUKernel fail!";
|
||||
return nullptr;
|
||||
int MatmulCPUKernel::Init() {
|
||||
int batch = 1;
|
||||
auto x_shape = inputs_[0]->shape();
|
||||
auto o_shape = outputs_[0]->shape();
|
||||
for (int i = 0; i < x_shape.size() - 2; ++i) {
|
||||
batch *= x_shape[i];
|
||||
}
|
||||
auto ret = kernel->Init();
|
||||
if (ret != RET_OK) {
|
||||
delete kernel;
|
||||
MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: "
|
||||
<< schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_));
|
||||
return nullptr;
|
||||
params_->batch = batch;
|
||||
params_->row_ = o_shape[o_shape.size() - 2];
|
||||
params_->col_ = o_shape[o_shape.size() - 1];
|
||||
params_->deep_ = params_->a_transpose_ ? x_shape[x_shape.size() - 2] : x_shape[x_shape.size() - 1];
|
||||
params_->row_8_ = UP_ROUND(params_->row_, 8);
|
||||
params_->col_8_ = UP_ROUND(params_->col_, 8);
|
||||
thread_count_ = MSMIN(thread_count_, UP_DIV(params_->col_8_, 8));
|
||||
thread_stride_ = UP_DIV(UP_DIV(params_->col_8_, 8), thread_count_);
|
||||
|
||||
a_c8_ptr_ = reinterpret_cast<float *>(ctx_->allocator->Malloc(params_->row_8_ * params_->deep_ * sizeof(float)));
|
||||
if (!a_c8_ptr_) {
|
||||
return RET_MEMORY_FAILED;
|
||||
}
|
||||
return kernel;
|
||||
memset(a_c8_ptr_, 0, params_->row_8_ * params_->deep_ * sizeof(float));
|
||||
b_r8_ptr_ = reinterpret_cast<float *>(ctx_->allocator->Malloc(params_->col_8_ * params_->deep_ * sizeof(float)));
|
||||
if (!b_r8_ptr_) {
|
||||
return RET_MEMORY_FAILED;
|
||||
}
|
||||
memset(b_r8_ptr_, 0, params_->col_8_ * params_->deep_ * sizeof(float));
|
||||
c_r8x8_ptr_ = reinterpret_cast<float *>(ctx_->allocator->Malloc(params_->row_8_ * params_->col_8_ * sizeof(float)));
|
||||
if (!c_r8x8_ptr_) {
|
||||
return RET_MEMORY_FAILED;
|
||||
}
|
||||
memset(c_r8x8_ptr_, 0, params_->row_8_ * params_->col_8_ * sizeof(float));
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_MatMul, CpuMatmulFp32KernelCreator)
|
||||
int MatmulCPUKernel::RunImpl(int task_id) {
|
||||
int cur_oc = MSMIN(thread_stride_, UP_DIV(params_->col_8_, 8) - task_id * thread_stride_);
|
||||
if (cur_oc <= 0) {
|
||||
return RET_OK;
|
||||
}
|
||||
auto cur_b = b_r8_ptr_ + task_id * thread_stride_ * C8NUM * params_->deep_;
|
||||
auto cur_c = c_r8x8_ptr_ + task_id * thread_stride_ * C8NUM * params_->row_8_;
|
||||
MatMul(a_c8_ptr_, cur_b, cur_c, NULL, ActType_No, params_->deep_, params_->row_8_, cur_oc * 8);
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int MatmulFloatRun(int task_id, LiteParallelGroupEnv *penv, void *cdata) {
|
||||
auto op = reinterpret_cast<MatmulCPUKernel *>(cdata);
|
||||
auto error_code = op->RunImpl(task_id);
|
||||
if (error_code != RET_OK) {
|
||||
MS_LOG(ERROR) << "MatmulFp32Run error task_id[" << task_id << "] error_code[" << error_code << "]";
|
||||
return RET_ERROR;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int MatmulCPUKernel::Run() {
|
||||
auto a_ptr = reinterpret_cast<float *>(inputs_[0]->Data());
|
||||
auto b_ptr = reinterpret_cast<float *>(inputs_[1]->Data());
|
||||
auto c_ptr = reinterpret_cast<float *>(outputs_[0]->Data());
|
||||
auto a_stride = params_->row_ * params_->deep_;
|
||||
auto b_stride = params_->deep_ * params_->col_;
|
||||
auto c_stride = params_->row_ * params_->col_;
|
||||
for (int i = 0; i < params_->batch; ++i) {
|
||||
auto cur_a_ptr = a_ptr + i * a_stride;
|
||||
auto cur_b_ptr = b_ptr + i * b_stride;
|
||||
auto cur_c_ptr = c_ptr + i * c_stride;
|
||||
if (params_->a_transpose_) {
|
||||
RowMajor2Row8Major(cur_a_ptr, a_c8_ptr_, params_->deep_, params_->row_);
|
||||
} else {
|
||||
RowMajor2Col8Major(cur_a_ptr, a_c8_ptr_, params_->row_, params_->deep_);
|
||||
}
|
||||
if (params_->b_transpose_) {
|
||||
RowMajor2Col8Major(cur_b_ptr, b_r8_ptr_, params_->col_, params_->deep_);
|
||||
} else {
|
||||
RowMajor2Row8Major(cur_b_ptr, b_r8_ptr_, params_->deep_, params_->col_);
|
||||
}
|
||||
LiteBackendParallelLaunch(MatmulFloatRun, this, thread_count_);
|
||||
Row8x8Major2RowMajor(c_r8x8_ptr_, cur_c_ptr, params_->row_, params_->col_);
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
} // namespace mindspore::kernel
|
||||
|
|
|
@ -19,27 +19,26 @@
|
|||
|
||||
#include <vector>
|
||||
#include "src/lite_kernel.h"
|
||||
|
||||
#include "src/runtime/kernel/arm/opclib/matmul.h"
|
||||
#include "src/runtime/kernel/arm/base/matmul_base.h"
|
||||
|
||||
namespace mindspore::kernel {
|
||||
class MatmulCPUKernel : public LiteKernel {
|
||||
class MatmulCPUKernel : public MatmulBaseCPUKernel {
|
||||
public:
|
||||
explicit MatmulCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
|
||||
const std::vector<lite::tensor::Tensor *> &outputs)
|
||||
: LiteKernel(parameter, inputs, outputs) {
|
||||
matmul_param_ = reinterpret_cast<MatMulParameter *>(parameter);
|
||||
}
|
||||
~MatmulCPUKernel() override = default;
|
||||
|
||||
const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx)
|
||||
: MatmulBaseCPUKernel(parameter, inputs, outputs, ctx) {}
|
||||
~MatmulCPUKernel() override;
|
||||
int Init() override;
|
||||
int ReSize() override;
|
||||
int Run() override;
|
||||
int RunImpl(int task_id);
|
||||
|
||||
private:
|
||||
MatMulParameter *matmul_param_;
|
||||
float *a_c8_ptr_;
|
||||
float *b_r8_ptr_;
|
||||
float *c_r8x8_ptr_;
|
||||
};
|
||||
} // namespace mindspore::kernel
|
||||
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_MATMUL_H_
|
||||
|
||||
|
|
|
@ -42,7 +42,7 @@ class FullconnectionInt8CPUKernel : public FullconnectionBaseCPUKernel {
|
|||
int RunImpl(int task_id);
|
||||
|
||||
private:
|
||||
FcQuantArg quant_params_;
|
||||
MatmulQuantArg quant_params_;
|
||||
int8_t *a_c8_ptr_;
|
||||
int8_t *b_r8_ptr_;
|
||||
int *c_r8x8_ptr_;
|
||||
|
|
|
@ -0,0 +1,142 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "src/runtime/kernel/arm/int8/matmul_int8.h"
|
||||
#include "src/runtime/kernel/arm/opclib/int8/matmul.h"
|
||||
#include "src/runtime/kernel/arm/opclib/common_func.h"
|
||||
#include "src/runtime/runtime_api.h"
|
||||
#include "include/errorcode.h"
|
||||
|
||||
using mindspore::lite::RET_MEMORY_FAILED;
|
||||
using mindspore::lite::RET_OK;
|
||||
|
||||
namespace mindspore::kernel {
|
||||
MatmulInt8CPUKernel::~MatmulInt8CPUKernel() {
|
||||
ctx_->allocator->Free(a_c8_ptr_);
|
||||
ctx_->allocator->Free(b_r8_ptr_);
|
||||
ctx_->allocator->Free(c_r8x8_ptr_);
|
||||
}
|
||||
|
||||
int MatmulInt8CPUKernel::Init() {
|
||||
int batch = 1;
|
||||
auto x_shape = inputs_[0]->shape();
|
||||
auto o_shape = outputs_[0]->shape();
|
||||
for (int i = 0; i < x_shape.size() - 2; ++i) {
|
||||
batch *= x_shape[i];
|
||||
}
|
||||
params_->batch = batch;
|
||||
params_->row_ = o_shape[o_shape.size() - 2];
|
||||
params_->col_ = o_shape[o_shape.size() - 1];
|
||||
params_->deep_ = params_->a_transpose_ ? x_shape[x_shape.size() - 2] : x_shape[x_shape.size() - 1];
|
||||
params_->row_8_ = UP_ROUND(params_->row_, 8);
|
||||
params_->col_8_ = UP_ROUND(params_->col_, 8);
|
||||
thread_count_ = MSMIN(thread_count_, UP_DIV(params_->col_8_, 8));
|
||||
thread_stride_ = UP_DIV(UP_DIV(params_->col_8_, 8), thread_count_);
|
||||
|
||||
a_c8_ptr_ = reinterpret_cast<int8_t *>(ctx_->allocator->Malloc(params_->row_8_ * params_->deep_ * sizeof(int8_t)));
|
||||
if (!a_c8_ptr_) {
|
||||
return RET_MEMORY_FAILED;
|
||||
}
|
||||
memset(a_c8_ptr_, 0, params_->row_8_ * params_->deep_ * sizeof(int8_t));
|
||||
b_r8_ptr_ = reinterpret_cast<int8_t *>(ctx_->allocator->Malloc(params_->col_8_ * params_->deep_ * sizeof(int8_t)));
|
||||
if (!b_r8_ptr_) {
|
||||
return RET_MEMORY_FAILED;
|
||||
}
|
||||
memset(b_r8_ptr_, 0, params_->col_8_ * params_->deep_ * sizeof(int8_t));
|
||||
c_r8x8_ptr_ = reinterpret_cast<int *>(ctx_->allocator->Malloc(params_->row_8_ * params_->col_8_ * sizeof(int)));
|
||||
if (!c_r8x8_ptr_) {
|
||||
return RET_MEMORY_FAILED;
|
||||
}
|
||||
memset(c_r8x8_ptr_, 0, params_->row_8_ * params_->col_8_ * sizeof(int));
|
||||
|
||||
auto input_tensor = inputs_[0];
|
||||
auto params = input_tensor->GetQuantParams();
|
||||
MS_ASSERT(params.size() == 1);
|
||||
quant_params_.input.zp_ = params.front().zeroPoint;
|
||||
quant_params_.input.scale_ = params.front().scale;
|
||||
auto weight_tensor = inputs_[1];
|
||||
params = weight_tensor->GetQuantParams();
|
||||
MS_ASSERT(params.size() == 1);
|
||||
quant_params_.weight.zp_ = params.front().zeroPoint;
|
||||
quant_params_.weight.scale_ = params.front().scale;
|
||||
auto output_tensor = outputs_[0];
|
||||
params = output_tensor->GetQuantParams();
|
||||
MS_ASSERT(params.size() == 1);
|
||||
quant_params_.output.zp_ = params.front().zeroPoint;
|
||||
quant_params_.output.scale_ = params.front().scale;
|
||||
|
||||
double real_multiplier = quant_params_.input.scale_ * quant_params_.weight.scale_ / quant_params_.output.scale_;
|
||||
QuantizeRoundParameter(real_multiplier, &quant_params_.quant_multiplier, &quant_params_.left_shift,
|
||||
&quant_params_.right_shift);
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int MatmulInt8CPUKernel::ReSize() { return RET_OK; }
|
||||
|
||||
int MatmulInt8CPUKernel::RunImpl(int task_id) {
|
||||
int cur_oc = MSMIN(thread_stride_, UP_DIV(params_->col_8_, 8) - task_id * thread_stride_);
|
||||
if (cur_oc <= 0) {
|
||||
return RET_OK;
|
||||
}
|
||||
auto cur_b = b_r8_ptr_ + task_id * thread_stride_ * C8NUM * params_->deep_;
|
||||
auto cur_c = c_r8x8_ptr_ + task_id * thread_stride_ * C8NUM * params_->row_8_;
|
||||
MatMulInt8(a_c8_ptr_, cur_b, cur_c, params_->row_8_, cur_oc * 8, params_->deep_, quant_params_.input.zp_,
|
||||
quant_params_.weight.zp_);
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int MatmulInt8Run(int task_id, LiteParallelGroupEnv *penv, void *cdata) {
|
||||
auto op = reinterpret_cast<MatmulInt8CPUKernel *>(cdata);
|
||||
auto ret = op->RunImpl(task_id);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "MatmulInt8Run error task_id[" << task_id << "] error_code[" << ret << "]";
|
||||
return ret;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int MatmulInt8CPUKernel::Run() {
|
||||
auto a_ptr = reinterpret_cast<int8_t *>(inputs_[0]->Data());
|
||||
auto b_ptr = reinterpret_cast<int8_t *>(inputs_[1]->Data());
|
||||
auto c_ptr = reinterpret_cast<int8_t *>(outputs_[0]->Data());
|
||||
auto a_stride = params_->row_ * params_->deep_;
|
||||
auto b_stride = params_->deep_ * params_->col_;
|
||||
auto c_stride = params_->row_ * params_->col_;
|
||||
|
||||
for (int i = 0; i < params_->batch; ++i) {
|
||||
auto cur_a_ptr = a_ptr + i * a_stride;
|
||||
auto cur_b_ptr = b_ptr + i * b_stride;
|
||||
auto cur_c_ptr = c_ptr + i * c_stride;
|
||||
if (params_->a_transpose_) {
|
||||
RowMajor2Row8MajorInt8(cur_a_ptr, a_c8_ptr_, params_->deep_, params_->row_);
|
||||
} else {
|
||||
RowMajor2Col8MajorInt8(cur_a_ptr, a_c8_ptr_, params_->row_, params_->deep_);
|
||||
}
|
||||
if (params_->b_transpose_) {
|
||||
RowMajor2Col8MajorInt8(cur_b_ptr, b_r8_ptr_, params_->col_, params_->deep_);
|
||||
} else {
|
||||
RowMajor2Row8MajorInt8(cur_b_ptr, b_r8_ptr_, params_->deep_, params_->col_);
|
||||
}
|
||||
LiteBackendParallelLaunch(MatmulInt8Run, this, thread_count_);
|
||||
auto &q = quant_params_;
|
||||
SimplePostFuncInt8(c_r8x8_ptr_, cur_c_ptr, params_->col_, params_->row_, params_->row_8_, q.quant_multiplier,
|
||||
q.left_shift, q.right_shift, q.output.zp_);
|
||||
}
|
||||
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
} // namespace mindspore::kernel
|
|
@ -0,0 +1,47 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_MATMUL_INT8_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_MATMUL_INT8_H_
|
||||
|
||||
#include <vector>
|
||||
#include "include/context.h"
|
||||
#include "src/runtime/kernel/arm/opclib/quantization/quantize.h"
|
||||
#include "src/runtime/kernel/arm/base/matmul_base.h"
|
||||
|
||||
using mindspore::lite::Context;
|
||||
|
||||
namespace mindspore::kernel {
|
||||
class MatmulInt8CPUKernel : public MatmulBaseCPUKernel {
|
||||
public:
|
||||
MatmulInt8CPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
|
||||
const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx)
|
||||
: MatmulBaseCPUKernel(parameter, inputs, outputs, ctx) {}
|
||||
~MatmulInt8CPUKernel() override;
|
||||
int Init() override;
|
||||
int ReSize() override;
|
||||
int Run() override;
|
||||
int RunImpl(int task_id);
|
||||
|
||||
private:
|
||||
MatmulQuantArg quant_params_;
|
||||
int8_t *a_c8_ptr_;
|
||||
int8_t *b_r8_ptr_;
|
||||
int *c_r8x8_ptr_;
|
||||
};
|
||||
} // namespace mindspore::kernel
|
||||
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_MATMUL_INT8_H_
|
|
@ -236,3 +236,20 @@ void PostFuncInt8(const int *in, const int *bias, int8_t *out, int oc, int plane
|
|||
}
|
||||
return;
|
||||
}
|
||||
|
||||
void SimplePostFuncInt8(const int *in, int8_t *out, int oc, int plane, int plane8, int32_t multiplier,
|
||||
int32_t left_shift, int32_t right_shift, int32_t zp) {
|
||||
/* (int32_t)row8x8-major * multiplier => (int8_t)row-major */
|
||||
for (int r = 0; r < plane; r++) {
|
||||
for (int c = 0; c < oc; c++) {
|
||||
int c8div = c / 8, c8mod = c % 8;
|
||||
int src_index = c8div * plane8 * 8 + r * 8 + c8mod;
|
||||
int dst_index = r * oc + c;
|
||||
int32_t value = in[src_index];
|
||||
value = MultiplyByQuantizedMultiplier(value, multiplier, left_shift, right_shift) + zp;
|
||||
value = MSMIN(CHAR_MAX, value);
|
||||
value = MSMAX(CHAR_MIN, value);
|
||||
out[dst_index] = (int8_t)value;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -33,6 +33,8 @@ void ReluFp32(float *data, int ele_num);
|
|||
void Relu6Fp32(float *data, int ele_num);
|
||||
void PostFuncInt8(const int *in, const int *bias, int8_t *out, int oc, int plane, int plane8, int32_t multiplier,
|
||||
int32_t left_shift, int32_t right_shift, int32_t zp, int8_t mini, int8_t maxi);
|
||||
void SimplePostFuncInt8(const int *in, int8_t *out, int oc, int plane, int plane8, int32_t multiplier,
|
||||
int32_t left_shift, int32_t right_shift, int32_t zp);
|
||||
void IndirectGemmFp32_8x8(float *output, const float *input, const float *weight, const float *bias, size_t step,
|
||||
size_t ic4, size_t output_channel, size_t offset, size_t mode, size_t writeC4, size_t relu,
|
||||
size_t relu6);
|
||||
|
|
|
@ -65,9 +65,7 @@ void MatMul8x8(const float *a, const float *b, float *c, const float *bias, ActT
|
|||
size_t bi = c8div * deep * 8 + d * 8 + c8mod;
|
||||
value = value + a[ai] * b[bi];
|
||||
}
|
||||
if (bias != nullptr) {
|
||||
value += bias[col];
|
||||
}
|
||||
if (bias != nullptr) value += bias[col];
|
||||
if (act_type == ActType_Relu6) value = MSMIN(6.0f, value);
|
||||
if (act_type != ActType_No) value = MSMAX(0.0f, value);
|
||||
c[ci] = value;
|
||||
|
|
|
@ -18,6 +18,17 @@
|
|||
#include <limits.h>
|
||||
#include "src/runtime/kernel/arm/opclib/quantization/fixed_point.h"
|
||||
|
||||
void RowMajor2Row8MajorInt8(int8_t *src_ptr, int8_t *dst_ptr, int row, int col) {
|
||||
for (int r = 0; r < row; r++) {
|
||||
int8_t *src = src_ptr + r * col;
|
||||
for (int c = 0; c < col; c++) {
|
||||
int cd8 = c / 8;
|
||||
int cm8 = c % 8;
|
||||
dst_ptr[cd8 * 8 * row + r * 8 + cm8] = src[c];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void RowMajor2Col8MajorInt8(int8_t *src_ptr, int8_t *dst_ptr, int row, int col) {
|
||||
for (int r = 0; r < row; r++) {
|
||||
int rd8 = r / 8;
|
||||
|
@ -26,7 +37,6 @@ void RowMajor2Col8MajorInt8(int8_t *src_ptr, int8_t *dst_ptr, int row, int col)
|
|||
dst_ptr[rd8 * col * 8 + c * 8 + rm8] = src_ptr[r * col + c];
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
void MatMulInt8(const int8_t *a, const int8_t *b, int32_t *c, const int row8, const int col8, const int deep,
|
||||
|
@ -46,5 +56,4 @@ void MatMulInt8(const int8_t *a, const int8_t *b, int32_t *c, const int row8, co
|
|||
c[ci] = value;
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
|
|
@ -22,7 +22,7 @@
|
|||
|
||||
void MatMulInt8(const int8_t *a, const int8_t *b, int32_t *c, const int row8, const int col8, const int deep,
|
||||
const int32_t a_zp, const int32_t b_zp);
|
||||
void RowMajor2Row8MajorInt8(int8_t *src_ptr, int8_t *dst_ptr, int row, int col);
|
||||
void RowMajor2Col8MajorInt8(int8_t *src_ptr, int8_t *dst_ptr, int row, int col);
|
||||
|
||||
#endif // MINDSPORE_LITE_SRC_BACKEND_ARM_OPCLIB_INT8_MATMUL_H_
|
||||
|
||||
|
|
|
@ -29,6 +29,7 @@ struct MatMulParameter {
|
|||
int col_8_;
|
||||
int deep_;
|
||||
bool has_bias_;
|
||||
int batch;
|
||||
bool a_transpose_; /* false : row-major */
|
||||
bool b_transpose_; /* true : col-major */
|
||||
ActType act_type_;
|
||||
|
|
|
@ -22,6 +22,7 @@
|
|||
#include <stdlib.h>
|
||||
#include <limits.h>
|
||||
#include <limits>
|
||||
#include "src/runtime/kernel/arm/opclib/op_base.h"
|
||||
|
||||
struct QuantArg {
|
||||
double scale_;
|
||||
|
@ -49,7 +50,7 @@ struct ConcatQuantArg {
|
|||
QuantArg out_quant_args_;
|
||||
};
|
||||
|
||||
struct FcQuantArg {
|
||||
struct MatmulQuantArg {
|
||||
QuantArg input;
|
||||
QuantArg weight;
|
||||
QuantArg output;
|
||||
|
@ -130,4 +131,22 @@ inline void CalculateActivationRangeQuantized(bool is_relu, bool is_relu6, int32
|
|||
*mini = min;
|
||||
*maxi = max;
|
||||
}
|
||||
|
||||
// quantize from float to int8
|
||||
inline void Quantize(float *input_data, int length, float scale, int zero_point, int8_t *output_data) {
|
||||
for (int i = 0; i < length; ++i) {
|
||||
int r = (int)round(input_data[i] / scale + zero_point);
|
||||
int8_t q = r > CHAR_MAX ? CHAR_MAX : r;
|
||||
q = q < CHAR_MIN ? CHAR_MIN : q;
|
||||
output_data[i] = q;
|
||||
}
|
||||
}
|
||||
|
||||
// dequantize from int8 to float
|
||||
inline void Dequantize(int8_t *input_data, int length, float scale, int zero_point, float *output_data) {
|
||||
for (int i = 0; i < length; ++i) {
|
||||
output_data[i] = scale * (input_data[i] - zero_point);
|
||||
}
|
||||
}
|
||||
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_QUANTIZATION_QUANTIZE_H_
|
||||
|
|
|
@ -0,0 +1,169 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include <iostream>
|
||||
#include "mindspore/core/utils/log_adapter.h"
|
||||
#include "common/common_test.h"
|
||||
#include "mindspore/lite/src/runtime/kernel/arm/fp32/matmul.h"
|
||||
#include "src/kernel_registry.h"
|
||||
#include "src/lite_kernel.h"
|
||||
|
||||
namespace mindspore {
|
||||
class TestMatMulFp32 : public mindspore::Common {
|
||||
public:
|
||||
TestMatMulFp32() {}
|
||||
};
|
||||
|
||||
int MMTestInit(std::vector<lite::tensor::Tensor *> *inputs_, std::vector<lite::tensor::Tensor *> *outputs_,
|
||||
float *a_ptr, float *b_ptr, std::vector<int> a_shape, std::vector<int> b_shape,
|
||||
std::vector<int> c_shape) {
|
||||
auto in_t =
|
||||
new lite::tensor::Tensor(kNumberTypeFloat, a_shape, schema::Format_NHWC, static_cast<schema::NodeType>(1));
|
||||
in_t->MallocData();
|
||||
memcpy(in_t->Data(), a_ptr, sizeof(float) * in_t->ElementsNum());
|
||||
inputs_->push_back(in_t);
|
||||
|
||||
auto weight_t =
|
||||
new lite::tensor::Tensor(kNumberTypeFloat, b_shape, schema::Format_NHWC, static_cast<schema::NodeType>(1));
|
||||
weight_t->MallocData();
|
||||
memcpy(weight_t->Data(), b_ptr, sizeof(float) * weight_t->ElementsNum());
|
||||
inputs_->push_back(weight_t);
|
||||
|
||||
auto out_t =
|
||||
new lite::tensor::Tensor(kNumberTypeFloat, c_shape, schema::Format_NHWC, static_cast<schema::NodeType>(1));
|
||||
out_t->MallocData();
|
||||
outputs_->push_back(out_t);
|
||||
|
||||
return out_t->ElementsNum();
|
||||
}
|
||||
|
||||
TEST_F(TestMatMulFp32, simple) {
|
||||
std::vector<lite::tensor::Tensor *> inputs_;
|
||||
std::vector<lite::tensor::Tensor *> outputs_;
|
||||
auto matmul_param = new MatMulParameter();
|
||||
matmul_param->a_transpose_ = false;
|
||||
matmul_param->b_transpose_ = false;
|
||||
matmul_param->has_bias_ = false;
|
||||
float a[] = {-3.2366564, -4.7733846, -7.8329225, 16.146885, 5.060793, -6.1471, -1.7680453, -6.5721383,
|
||||
17.87506, -5.1192183, 10.742863, 1.4536934, 19.693445, 19.45783, 5.063163, 0.5234792};
|
||||
float b[] = {-0.0024438887, 0.0006738146, -0.008169129, 0.0021510671, -0.012470592, -0.0053063435,
|
||||
0.006050155, 0.008656233, 0.012911413, -0.0028635843, -0.00034080597, -0.0010622552,
|
||||
-0.012254699, -0.01312836, 0.0025241964, -0.004706142, 0.002451482, -0.009558459,
|
||||
0.004481974, 0.0033251503, -0.011705584, -0.001720293, -0.0039410214, -0.0073637343};
|
||||
std::vector<int> a_shape = {1, 2, 8};
|
||||
std::vector<int> b_shape = {1, 8, 3};
|
||||
std::vector<int> c_shape = {1, 2, 3};
|
||||
int total_size = MMTestInit(&inputs_, &outputs_, a, b, a_shape, b_shape, c_shape);
|
||||
auto ctx = new lite::Context;
|
||||
ctx->threadNum = 2;
|
||||
auto mm = new kernel::MatmulCPUKernel(reinterpret_cast<OpParameter *>(matmul_param), inputs_, outputs_, ctx);
|
||||
mm->Init();
|
||||
mm->Run();
|
||||
float correct[] = {-0.1256939023733139, -0.07744802534580231, 0.07410638779401779,
|
||||
-0.3049793541431427, -0.027687929570674896, -0.18109679222106934};
|
||||
CompareOutputData(reinterpret_cast<float *>(outputs_[0]->Data()), correct, total_size, 0.0001);
|
||||
delete matmul_param;
|
||||
delete mm;
|
||||
for (auto t : inputs_) delete t;
|
||||
for (auto t : outputs_) delete t;
|
||||
}
|
||||
|
||||
TEST_F(TestMatMulFp32, simple_transb) {
|
||||
std::vector<lite::tensor::Tensor *> inputs_;
|
||||
std::vector<lite::tensor::Tensor *> outputs_;
|
||||
auto matmul_param = new MatMulParameter();
|
||||
matmul_param->a_transpose_ = false;
|
||||
matmul_param->b_transpose_ = true;
|
||||
matmul_param->has_bias_ = false;
|
||||
float a[] = {-3.2366564, -4.7733846, -7.8329225, 16.146885, 5.060793, -6.1471, -1.7680453, -6.5721383,
|
||||
17.87506, -5.1192183, 10.742863, 1.4536934, 19.693445, 19.45783, 5.063163, 0.5234792};
|
||||
float b[] = {-0.0024438887, 0.0006738146, -0.008169129, 0.0021510671, -0.012470592, -0.0053063435,
|
||||
0.006050155, 0.008656233, 0.012911413, -0.0028635843, -0.00034080597, -0.0010622552,
|
||||
-0.012254699, -0.01312836, 0.0025241964, -0.004706142, 0.002451482, -0.009558459,
|
||||
0.004481974, 0.0033251503, -0.011705584, -0.001720293, -0.0039410214, -0.0073637343};
|
||||
std::vector<int> a_shape = {1, 2, 8};
|
||||
std::vector<int> b_shape = {1, 3, 8};
|
||||
std::vector<int> c_shape = {1, 2, 3};
|
||||
int total_size = MMTestInit(&inputs_, &outputs_, a, b, a_shape, b_shape, c_shape);
|
||||
auto ctx = new lite::Context;
|
||||
ctx->threadNum = 2;
|
||||
auto mm = new kernel::MatmulCPUKernel(reinterpret_cast<OpParameter *>(matmul_param), inputs_, outputs_, ctx);
|
||||
mm->Init();
|
||||
mm->Run();
|
||||
float correct[] = {0.00533547, 0.002545945, 0.062974121, -0.445441471, -0.246223617, -0.142070031};
|
||||
CompareOutputData(reinterpret_cast<float *>(outputs_[0]->Data()), correct, total_size, 0.0001);
|
||||
delete matmul_param;
|
||||
delete mm;
|
||||
for (auto t : inputs_) delete t;
|
||||
for (auto t : outputs_) delete t;
|
||||
}
|
||||
|
||||
TEST_F(TestMatMulFp32, batch) {
|
||||
std::vector<lite::tensor::Tensor *> inputs_;
|
||||
std::vector<lite::tensor::Tensor *> outputs_;
|
||||
auto matmul_param = new MatMulParameter();
|
||||
matmul_param->a_transpose_ = false;
|
||||
matmul_param->b_transpose_ = true;
|
||||
matmul_param->has_bias_ = false;
|
||||
float a[] = {-4.946672525326248, 11.154420027909701, -7.831129637356922, 17.309845099949953, -10.46177877610444,
|
||||
2.5412751480833897, 2.700113860276929, -12.616715572097341, -15.513316568881574, -9.513294738065516,
|
||||
17.931148376418896, -10.83801964632579, -14.023733862948017, -14.50805001403956, 0.7952221556310306,
|
||||
6.619720423569035, -19.277904230909357, -13.450479287024839, 19.914652156692625, 16.542571697048878,
|
||||
-2.9715041389268926, 4.949555349889412, -1.9408110276290103, -15.062828261031868, 0.20012569643335,
|
||||
8.260383531209776, 3.1092344458607357, 16.742272486091487, 17.31277252415167, -16.60303202099434,
|
||||
-8.980314693173042, -11.735087989358268, -14.918976184088514, -11.347592686892733, 11.808756029220604,
|
||||
-18.76179414554809, 7.579758962360987, 3.13240880962163, 6.528181981442103, -16.802624652419794,
|
||||
-14.323146919914901, -16.197579076296144, 9.738053920125779, -12.245780062949866, 8.817905278096319,
|
||||
0.5261391331275007, -18.26152522535471, -2.400461208771226};
|
||||
float b[] = {
|
||||
-0.895183867395529, -0.8146900207660068, -0.27931593219652817, 0.783554361201179, -0.05080215007779798,
|
||||
-0.9879631271568501, 0.07710949009001333, -0.9562579726211344, 0.29505553318356825, -0.26651960351085124,
|
||||
-0.12755456259718279, -0.8221417897250098, -0.5094334041431876, -0.9117373380256013, 0.991501784215064,
|
||||
0.20131976450979394, 0.07889260559412059, -0.8138407752750305, -0.047622075866657454, -0.2778043115153188,
|
||||
-0.6269973420163957, -0.44345812666611617, -0.8571568605933642, 0.020192166011526735, 0.4860054298402434,
|
||||
0.41525925469513614, -0.40270506445219967, -0.8716538067535347, 0.5276448387223114, 0.6064500154192936,
|
||||
-0.9553204135772526, 0.3253219646257437, -0.7237956595774822, 0.3271284879679077, -0.534543967339336,
|
||||
-0.4076498484281894, 0.01574797075171963, -0.37322004720586244, 0.16425071396119928, -0.5328652244800547,
|
||||
0.7389336170615435, -0.6552069958923377, -0.042305872596973604, -0.6714941466767734, -0.9281411415119043,
|
||||
-0.7748558258281224, -0.6209799945964443, 0.02526428593887675, -0.44984776800225856, 0.6281401952319337,
|
||||
0.9907258228680276, 0.6288646615999687, -0.82076880150175, 0.3065944740797497, -0.29201038744043584,
|
||||
-0.025685501802048982, -0.07273175145419652, 0.9370449239208709, -0.8233807408078093, -0.4195634619023012,
|
||||
0.9799555630257346, -0.23461882935715228, -0.8884793313829993, -0.4760267734754635, -0.2874539543614072,
|
||||
-0.8795685985480997, -0.08099698251915255, -0.1626521023321741, -0.9337167240793414, 0.40924842916829207,
|
||||
-0.7375713045221615, -0.0065659291539015285};
|
||||
std::vector<int> a_shape = {3, 2, 8};
|
||||
std::vector<int> b_shape = {3, 3, 8};
|
||||
std::vector<int> c_shape = {3, 2, 3};
|
||||
int total_size = MMTestInit(&inputs_, &outputs_, a, b, a_shape, b_shape, c_shape);
|
||||
auto ctx = new lite::Context;
|
||||
ctx->threadNum = 1;
|
||||
auto mm = new kernel::MatmulCPUKernel(reinterpret_cast<OpParameter *>(matmul_param), inputs_, outputs_, ctx);
|
||||
mm->Init();
|
||||
mm->Run();
|
||||
float correct[] = {21.38518524169922, -14.514888763427734, -11.040614128112793, 16.91403579711914,
|
||||
27.07421112060547, 23.35394287109375, -39.006141662597656, -2.021998405456543,
|
||||
-17.63555145263672, -8.490625381469727, 5.317771911621094, -14.561882019042969,
|
||||
-7.251564025878906, -2.508212089538574, 5.86458683013916, -3.466249465942383,
|
||||
8.869029998779297, 25.034008026123047};
|
||||
|
||||
float *output = reinterpret_cast<float *>(outputs_[0]->Data());
|
||||
for (int i = 0; i < 18; ++i) printf("%f ", output[i]);
|
||||
CompareOutputData(reinterpret_cast<float *>(outputs_[0]->Data()), correct, total_size, 0.0001);
|
||||
delete matmul_param;
|
||||
delete mm;
|
||||
for (auto t : inputs_) delete t;
|
||||
for (auto t : outputs_) delete t;
|
||||
}
|
||||
} // namespace mindspore
|
|
@ -13,13 +13,11 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
#include "utils/log_adapter.h"
|
||||
#include "common/common_test.h"
|
||||
#include "mindspore/lite/src/runtime/kernel/arm/int8/fullconnection_int8.h"
|
||||
#include "mindspore/lite/src/runtime/kernel/arm/opclib/int8/matmul.h"
|
||||
#include "mindspore/lite/src/runtime/kernel/arm/opclib/common_func.h"
|
||||
#include "mindspore/lite/src/runtime/kernel/arm/opclib/quantization/quantize.h"
|
||||
#include "mindspore/lite/src/kernel_registry.h"
|
||||
#include "mindspore/lite/src/lite_kernel.h"
|
||||
|
||||
|
@ -30,21 +28,6 @@ class TestFcInt8 : public mindspore::Common {
|
|||
TestFcInt8() {}
|
||||
};
|
||||
|
||||
void Quantize(float *input_data, int length, float scale, int zero_point, int8_t *output_data) {
|
||||
for (int i = 0; i < length; ++i) {
|
||||
int8_t q = static_cast<int8_t>(std::max<float>(
|
||||
std::numeric_limits<int8_t>::min(),
|
||||
std::min<float>(std::numeric_limits<int8_t>::max(), std::round(zero_point + (input_data[i] / scale)))));
|
||||
output_data[i] = q;
|
||||
}
|
||||
}
|
||||
|
||||
void Dequantize(int8_t *input_data, int length, float scale, int zero_point, float *output_data) {
|
||||
for (int i = 0; i < length; ++i) {
|
||||
output_data[i] = scale * (input_data[i] - zero_point);
|
||||
}
|
||||
}
|
||||
|
||||
int FcInt8TestInit(std::vector<lite::tensor::Tensor *> *inputs_, std::vector<lite::tensor::Tensor *> *outputs_,
|
||||
MatMulParameter *matmal_param, float **correct, double *scale, int *zeropoint) {
|
||||
float input_max = 20;
|
||||
|
|
|
@ -0,0 +1,126 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "utils/log_adapter.h"
|
||||
#include "common/common_test.h"
|
||||
#include "mindspore/lite/src/runtime/kernel/arm/int8/matmul_int8.h"
|
||||
#include "mindspore/lite/src/runtime/kernel/arm/opclib/quantization/quantize.h"
|
||||
#include "mindspore/lite/src/runtime/kernel/arm/opclib/common_func.h"
|
||||
#include "mindspore/lite/src/kernel_registry.h"
|
||||
#include "mindspore/lite/src/lite_kernel.h"
|
||||
|
||||
namespace mindspore {
|
||||
class TestMatmulInt8 : public mindspore::Common {
|
||||
public:
|
||||
TestMatmulInt8() {}
|
||||
};
|
||||
|
||||
int MMInt8TestInit(std::vector<lite::tensor::Tensor *> *inputs_, std::vector<lite::tensor::Tensor *> *outputs_,
|
||||
MatMulParameter *matmal_param, float **correct, double *scale, int *zeropoint) {
|
||||
float input_max = 20;
|
||||
float input_min = -20;
|
||||
float weight_max = 1;
|
||||
float weight_min = -1;
|
||||
float output_max = 30;
|
||||
float output_min = -30;
|
||||
|
||||
double input_scale =
|
||||
(input_max - input_min) / (std::numeric_limits<int8_t>::max() - std::numeric_limits<int8_t>::min());
|
||||
int input_zp = std::numeric_limits<int8_t>::max() - input_max / input_scale;
|
||||
double weight_scale =
|
||||
(weight_max - weight_min) / (std::numeric_limits<int8_t>::max() - std::numeric_limits<int8_t>::min());
|
||||
int weight_zp = std::numeric_limits<int8_t>::max() - weight_max / weight_scale;
|
||||
double output_scale =
|
||||
(output_max - output_min) / (std::numeric_limits<int8_t>::max() - std::numeric_limits<int8_t>::min());
|
||||
int output_zp = std::numeric_limits<int8_t>::max() - output_max / output_scale;
|
||||
*scale = output_scale;
|
||||
*zeropoint = output_zp;
|
||||
|
||||
auto in_t =
|
||||
new lite::tensor::Tensor(kNumberTypeInt8, {1, 2, 8}, schema::Format_NHWC, static_cast<schema::NodeType>(1));
|
||||
in_t->MallocData();
|
||||
float in[] = {6.583835634764597, 11.337275140963907, -4.125256949459629, 10.994337291530833,
|
||||
19.086065139532636, 3.620842999158455, 13.167624585590346, -18.326739299407755,
|
||||
14.877693740734841, -17.092677920571653, 19.24147072807235, -15.14805323833401,
|
||||
-18.075654829688737, -0.9164404591894204, -3.836646280336332, -10.870298671273918};
|
||||
Quantize(in, in_t->ElementsNum(), input_scale, input_zp, reinterpret_cast<int8_t *>(in_t->Data()));
|
||||
auto in_quant_arg = new mindspore::lite::tensor::QuantArg();
|
||||
in_quant_arg->zeroPoint = input_zp;
|
||||
in_quant_arg->scale = input_scale;
|
||||
in_t->AddQuantParam(*in_quant_arg);
|
||||
inputs_->push_back(in_t);
|
||||
|
||||
auto weight_t =
|
||||
new lite::tensor::Tensor(kNumberTypeInt8, {1, 3, 8}, schema::Format_NHWC, static_cast<schema::NodeType>(1));
|
||||
weight_t->MallocData();
|
||||
float weight[] = {0.3651070698591563, -0.5856943921727129, -0.7472032663840145, 0.9489992871641959,
|
||||
-0.8179490270358738, -0.873058811259344, 0.39876672713807215, -0.1816769383004213,
|
||||
-0.13584645926733696, -0.7614673836659709, -0.2535825872616164, -0.05265760030895916,
|
||||
0.28558728305658754, 0.15404213943520118, -0.1634824450738006, -0.5068199082730189,
|
||||
-0.026961256849111326, -0.1508441942453307, 0.9375335677537737, 0.3304690744194263,
|
||||
-0.5091563780251127, 0.029887336278646925, -0.39540496207319276, 0.46094065001445084};
|
||||
Quantize(weight, weight_t->ElementsNum(), weight_scale, weight_zp, reinterpret_cast<int8_t *>(weight_t->Data()));
|
||||
auto weight_quant_arg = new mindspore::lite::tensor::QuantArg();
|
||||
weight_quant_arg->zeroPoint = weight_zp;
|
||||
weight_quant_arg->scale = weight_scale;
|
||||
weight_t->AddQuantParam(*weight_quant_arg);
|
||||
inputs_->push_back(weight_t);
|
||||
|
||||
auto out_t =
|
||||
new lite::tensor::Tensor(kNumberTypeInt8, {1, 2, 3}, schema::Format_NHWC, static_cast<schema::NodeType>(1));
|
||||
out_t->MallocData();
|
||||
auto output_quant_arg = new mindspore::lite::tensor::QuantArg();
|
||||
output_quant_arg->zeroPoint = output_zp;
|
||||
output_quant_arg->scale = output_scale;
|
||||
out_t->AddQuantParam(*output_quant_arg);
|
||||
outputs_->push_back(out_t);
|
||||
|
||||
*correct = reinterpret_cast<float *>(malloc(out_t->ElementsNum() * sizeof(float)));
|
||||
float nchw_co[] = {-0.912632942, 4.08398056, -25.385608673, 2.720281124, 7.745952606, 20.893184662};
|
||||
memcpy(*correct, nchw_co, out_t->ElementsNum() * sizeof(float));
|
||||
|
||||
matmal_param->b_transpose_ = true;
|
||||
matmal_param->a_transpose_ = false;
|
||||
matmal_param->has_bias_ = false;
|
||||
return out_t->ElementsNum();
|
||||
}
|
||||
|
||||
TEST_F(TestMatmulInt8, mmint8) {
|
||||
std::vector<lite::tensor::Tensor *> inputs_;
|
||||
std::vector<lite::tensor::Tensor *> outputs_;
|
||||
auto matmul_param = new MatMulParameter();
|
||||
float *correct;
|
||||
double output_scale;
|
||||
int output_zp;
|
||||
int total_size = MMInt8TestInit(&inputs_, &outputs_, matmul_param, &correct, &output_scale, &output_zp);
|
||||
auto ctx = new lite::Context;
|
||||
ctx->threadNum = 2;
|
||||
kernel::MatmulInt8CPUKernel *mm =
|
||||
new kernel::MatmulInt8CPUKernel(reinterpret_cast<OpParameter *>(matmul_param), inputs_, outputs_, ctx);
|
||||
|
||||
mm->Init();
|
||||
mm->Run();
|
||||
float fout[6] = {0};
|
||||
Dequantize(reinterpret_cast<int8_t *>(outputs_[0]->Data()), outputs_[0]->ElementsNum(), output_scale, output_zp,
|
||||
fout);
|
||||
CompareOutputData(fout, correct, 6, 0.3);
|
||||
delete matmul_param;
|
||||
delete mm;
|
||||
for (auto t : inputs_) delete t;
|
||||
for (auto t : outputs_) delete t;
|
||||
free(correct);
|
||||
}
|
||||
|
||||
} // namespace mindspore
|
Loading…
Reference in New Issue