fix AdamWeightDecay core dump on no avx512
This commit is contained in:
parent
35173b9251
commit
e9bb4e3347
|
@ -181,6 +181,7 @@ constexpr auto kZeta = "Zeta";
|
|||
constexpr auto kSquaredDifference = "SquaredDifference";
|
||||
constexpr auto kZerosLike = "ZerosLike";
|
||||
constexpr auto kEqual = "Equal";
|
||||
constexpr auto kGreaterEqual = "GreaterEqual";
|
||||
constexpr auto kOnesLike = "OnesLike";
|
||||
constexpr auto kSign = "Sign";
|
||||
constexpr auto kFmax = "Fmax";
|
||||
|
@ -267,7 +268,8 @@ const std::set<std::string> kCpuKernelOps{kIdentity,
|
|||
kStatelessDropOutGenMask,
|
||||
kTopK,
|
||||
kSign,
|
||||
kRealDiv};
|
||||
kRealDiv,
|
||||
kGreaterEqual};
|
||||
const std::set<std::string> kCacheKernelOps{kUpdateCache, kCacheSwapTable, kSubAndFilter, kPadAndShift, kDropout3D,
|
||||
kDropout2D, kNonMaxSuppressionV3, kGetNext, kInitData, kPrint};
|
||||
const std::set<std::string> kCpuKernelBaseOps{kDropoutGenMaskOpName,
|
||||
|
|
|
@ -13,11 +13,13 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <math.h>
|
||||
#include "nnacl/errorcode.h"
|
||||
#include "nnacl/fp32/exp_fp32.h"
|
||||
#include "nnacl/fp32/adam_fp32.h"
|
||||
#include "nnacl/adam_fp32_simd.h"
|
||||
#include "nnacl/intrinsics/ms_simd_instructions.h"
|
||||
#ifdef ENABLE_AVX512
|
||||
#include "nnacl/avx512/adam_fp32_avx512.h"
|
||||
#endif
|
||||
|
||||
int AdamFp32(float *var, float *m, float *v, float lr, float beta1, float beta2, float epsilon, const float *gradient,
|
||||
size_t start, size_t end, bool use_nesterov) {
|
||||
|
@ -157,7 +159,7 @@ int AdamDeltaFp32(float *delta, float *m, float *v, float lr, float beta1, float
|
|||
int AdamWeightDecayFp32(float *var, float *m, float *v, float lr, float beta1, float beta2, float epsilon, float decay,
|
||||
const float *gradient, size_t start, size_t end) {
|
||||
size_t c1 = start;
|
||||
SIMD_RUN_NO_SCALAR(AdamWeightDecayFp32, c1, var, m, v, lr, beta1, beta2, epsilon, decay, gradient, end);
|
||||
SIMD_RUN_AVX512(AdamWeightDecayFp32, c1, var, m, v, lr, beta1, beta2, epsilon, decay, gradient, end);
|
||||
|
||||
// remaining
|
||||
const float beta1_minus = 1 - beta1;
|
||||
|
|
|
@ -23,6 +23,7 @@
|
|||
extern "C" {
|
||||
#endif
|
||||
@SIMD_INSTRUCTION_BEGIN@
|
||||
#ifdef MS_SIMD_AVX512
|
||||
static inline size_t AdamWeightDecayFp32@SIMD_INSTRUCTION@(size_t index, float *var, float *m, float *v, float lr, float beta1, float beta2, float epsilon, float decay,
|
||||
const float *gradient, size_t end) {
|
||||
SIMD_F32 beta1_r = SIMD_MOV_F32(beta1);
|
||||
|
@ -56,7 +57,6 @@ extern "C" {
|
|||
return index;
|
||||
}
|
||||
|
||||
#ifdef MS_SIMD_AVX512
|
||||
static inline size_t FusedCastAdamFp32Fp16@SIMD_INSTRUCTION@(size_t index, float *var, const int16_t *gradient16, float *m, float *v, float lr, float beta1, float beta2, float epsilon, float decay,
|
||||
float global_norm_reciprocal, size_t end) {
|
||||
SIMD_F32 beta1_r = SIMD_MOV_F32(beta1);
|
||||
|
|
|
@ -21,17 +21,17 @@ greater_equal_op_info = AiCPURegOp("GreaterEqual") \
|
|||
.input(0, "x1", "required") \
|
||||
.input(1, "x2", "required") \
|
||||
.output(0, "y", "required") \
|
||||
.dtype_format(DataType.I8_None, DataType.I8_None, DataType.BOOL_None) \
|
||||
.dtype_format(DataType.I16_None, DataType.I16_None, DataType.BOOL_None) \
|
||||
.dtype_format(DataType.I32_None, DataType.I32_None, DataType.BOOL_None) \
|
||||
.dtype_format(DataType.I64_None, DataType.I64_None, DataType.BOOL_None) \
|
||||
.dtype_format(DataType.U8_None, DataType.U8_None, DataType.BOOL_None) \
|
||||
.dtype_format(DataType.U16_None, DataType.U16_None, DataType.BOOL_None) \
|
||||
.dtype_format(DataType.U32_None, DataType.U32_None, DataType.BOOL_None) \
|
||||
.dtype_format(DataType.U64_None, DataType.U64_None, DataType.BOOL_None) \
|
||||
.dtype_format(DataType.F16_None, DataType.F16_None, DataType.BOOL_None) \
|
||||
.dtype_format(DataType.F32_None, DataType.F32_None, DataType.BOOL_None) \
|
||||
.dtype_format(DataType.F64_None, DataType.F64_None, DataType.BOOL_None) \
|
||||
.dtype_format(DataType.I8_Default, DataType.I8_Default, DataType.BOOL_Default) \
|
||||
.dtype_format(DataType.I16_Default, DataType.I16_Default, DataType.BOOL_Default) \
|
||||
.dtype_format(DataType.I32_Default, DataType.I32_Default, DataType.BOOL_Default) \
|
||||
.dtype_format(DataType.I64_Default, DataType.I64_Default, DataType.BOOL_Default) \
|
||||
.dtype_format(DataType.U8_Default, DataType.U8_Default, DataType.BOOL_Default) \
|
||||
.dtype_format(DataType.U16_Default, DataType.U16_Default, DataType.BOOL_Default) \
|
||||
.dtype_format(DataType.U32_Default, DataType.U32_Default, DataType.BOOL_Default) \
|
||||
.dtype_format(DataType.U64_Default, DataType.U64_Default, DataType.BOOL_Default) \
|
||||
.dtype_format(DataType.F16_Default, DataType.F16_Default, DataType.BOOL_Default) \
|
||||
.dtype_format(DataType.F32_Default, DataType.F32_Default, DataType.BOOL_Default) \
|
||||
.dtype_format(DataType.F64_Default, DataType.F64_Default, DataType.BOOL_Default) \
|
||||
.get_op_info()
|
||||
|
||||
|
||||
|
|
|
@ -202,6 +202,7 @@ file(GLOB_RECURSE MINDSPORE_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
|
|||
"../../../mindspore/ccsrc/plugin/device/cpu/kernel/sparse_apply_proximal_adagrad_cpu_kernel.cc"
|
||||
"../../../mindspore/ccsrc/plugin/device/cpu/kernel/unique_cpu_kernel.cc"
|
||||
"../../../mindspore/ccsrc/plugin/device/cpu/kernel/unique_with_pad_cpu_kernel.cc"
|
||||
"../../../mindspore/ccsrc/plugin/device/cpu/kernel/adam_delta_cpu_kernel.cc"
|
||||
"../../../mindspore/ccsrc/plugin/device/cpu/kernel/fused_ada_factor_cpu_kernel.cc"
|
||||
"../../../mindspore/ccsrc/kernel/akg/*.cc"
|
||||
"../../../mindspore/ccsrc/plugin/device/ascend/kernel/akg/*.cc"
|
||||
|
@ -245,6 +246,7 @@ file(GLOB_RECURSE MINDSPORE_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
|
|||
"../../../mindspore/ccsrc/distributed/embedding_cache/*.cc"
|
||||
"../../../mindspore/ccsrc/plugin/device/ascend/hal/profiler/*.cc"
|
||||
"../../../mindspore/ccsrc/profiler/device/profiling.cc"
|
||||
"../../../mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/fp32/adam_fp32.c"
|
||||
"../../../mindspore/ccsrc/kernel/kernel.cc"
|
||||
"../../../mindspore/ccsrc/plugin/device/ascend/kernel/akg/akg_kernel_metadata.cc"
|
||||
"../../../mindspore/ccsrc/plugin/device/ascend/kernel/ascend_kernel_mod.cc"
|
||||
|
|
|
@ -0,0 +1,96 @@
|
|||
/**
|
||||
* Copyright 2023 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <vector>
|
||||
#include "common/common_test.h"
|
||||
#define private public
|
||||
#define protected public
|
||||
#include "plugin/device/cpu/kernel/adam_delta_cpu_kernel.h"
|
||||
#undef private
|
||||
#undef protected
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
class AdamDeltaCpuKernelTest : public UT::Common {
|
||||
public:
|
||||
AdamDeltaCpuKernelTest() : adam_delta_(std::make_shared<AdamDeltaCpuKernelMod>()) {}
|
||||
|
||||
void SetUp() override {
|
||||
delta_.clear();
|
||||
m_.clear();
|
||||
v_.clear();
|
||||
grad_.clear();
|
||||
inputs_.clear();
|
||||
workspace_.clear();
|
||||
outputs_.clear();
|
||||
}
|
||||
|
||||
AddressPtr CreateKernelAddress(void *addr, size_t elem_num) {
|
||||
auto kernel_addr = std::make_shared<Address>();
|
||||
kernel_addr->addr = addr;
|
||||
kernel_addr->size = elem_num * sizeof(float);
|
||||
return kernel_addr;
|
||||
}
|
||||
|
||||
void CreateAddress() {
|
||||
inputs_.push_back(CreateKernelAddress(m_.data(), elem_num_));
|
||||
inputs_.push_back(CreateKernelAddress(v_.data(), elem_num_));
|
||||
inputs_.push_back(CreateKernelAddress(&beta1_power_, 1));
|
||||
inputs_.push_back(CreateKernelAddress(&beta2_power_, 1));
|
||||
inputs_.push_back(CreateKernelAddress(&lr_, 1));
|
||||
inputs_.push_back(CreateKernelAddress(&beta1_, 1));
|
||||
inputs_.push_back(CreateKernelAddress(&beta2_, 1));
|
||||
inputs_.push_back(CreateKernelAddress(&epsilon_, 1));
|
||||
inputs_.push_back(CreateKernelAddress(grad_.data(), elem_num_));
|
||||
outputs_.push_back(CreateKernelAddress(delta_.data(), elem_num_));
|
||||
}
|
||||
|
||||
std::vector<float> delta_;
|
||||
std::vector<float> m_;
|
||||
std::vector<float> v_;
|
||||
std::vector<float> grad_;
|
||||
std::vector<AddressPtr> inputs_;
|
||||
std::vector<AddressPtr> workspace_;
|
||||
std::vector<AddressPtr> outputs_;
|
||||
std::shared_ptr<AdamDeltaCpuKernelMod> adam_delta_;
|
||||
float beta1_power_ = 0.9;
|
||||
float beta2_power_ = 0.999;
|
||||
float lr_ = 0.001;
|
||||
float beta1_ = 0.9;
|
||||
float beta2_ = 0.999;
|
||||
float epsilon_ = 1e-8;
|
||||
size_t elem_num_ = 27;
|
||||
};
|
||||
|
||||
/// Feature: Develop AdamDelta op on CPU.
|
||||
/// Description: Test AdamDeltaCpuKernel.
|
||||
/// Expectation: The AdamDeltaCpuKernel is successfully executed and a correct result is returned.
|
||||
TEST_F(AdamDeltaCpuKernelTest, compute_test) {
|
||||
for (size_t i = 0; i < elem_num_; ++i) {
|
||||
delta_.push_back(1.0);
|
||||
m_.push_back(1.0);
|
||||
v_.push_back(1.0);
|
||||
grad_.push_back(1.0);
|
||||
}
|
||||
adam_delta_->elem_num_ = elem_num_;
|
||||
CreateAddress();
|
||||
adam_delta_->Launch(inputs_, workspace_, outputs_);
|
||||
for (size_t i = 0; i < elem_num_; ++i) {
|
||||
EXPECT_TRUE(std::fabs(delta_[i] + 0.000316) < 1e-6);
|
||||
}
|
||||
}
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
Loading…
Reference in New Issue