!21400 pclint warnings clean

Merge pull request !21400 from 范吉斌/pclint_master
This commit is contained in:
i-robot 2021-08-06 06:38:46 +00:00 committed by Gitee
commit 9c4a7919c8
7 changed files with 49 additions and 50 deletions

View File

@ -76,27 +76,10 @@ void ApplyAdagradCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs,
// multithreading
size_t length = inputs[0]->size / sizeof(T);
size_t max_thread_num = std::thread::hardware_concurrency();
size_t use_thread_num = length < 128 * max_thread_num ? std::ceil(length / 128.0) : max_thread_num;
std::vector<std::thread> threads;
threads.reserve(use_thread_num);
size_t start = 0;
const size_t batch_size = (length + use_thread_num - 1) / use_thread_num;
if (batch_size == 0) {
MS_LOG(EXCEPTION) << "Error occur in launch kernel";
return;
}
while (start < length) {
size_t end = (start + batch_size) > length ? length : (start + batch_size);
threads.emplace_back(
std::thread(&ApplyAdagradCPUKernel::LaunchApplyAdagrad<T *>, this, var, accum, lr, gradient, start, end));
start += batch_size;
}
for (auto &it : threads) {
it.join();
}
auto task = [this, &var, &accum, lr, gradient](size_t start, size_t end) {
LaunchApplyAdagrad(var, accum, lr, gradient, start, end);
};
CPUKernelUtils::ParallelForAutoSearch(task, length, &parallel_search_info_);
// Copy result to output tensor
auto output_var = reinterpret_cast<T *>(outputs[0]->addr);

View File

@ -13,10 +13,12 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "backend/kernel_compiler/cpu/arithmetic_logic_cpu_kernel.h"
#include <cmath>
#include <string>
#include <map>
#include "backend/kernel_compiler/cpu/arithmetic_logic_cpu_kernel.h"
#include <functional>
#include "runtime/device/cpu/cpu_device_address.h"
namespace mindspore {
@ -29,7 +31,9 @@ void ArithmeticLogicCPUKernel<T>::Less(const T *input1, const T *input2, bool *o
auto iter = base_iter;
iter.SetPos(start);
for (size_t i = start; i < end; i++) {
out[i] = input1[iter.GetInputPosA()] < input2[iter.GetInputPosB()];
auto x = input1[iter.GetInputPosA()];
auto y = input2[iter.GetInputPosB()];
out[i] = std::less<T>()(x, y);
iter.GenNextPos();
}
};
@ -37,7 +41,9 @@ void ArithmeticLogicCPUKernel<T>::Less(const T *input1, const T *input2, bool *o
} else {
base_iter.SetPos(0);
for (size_t i = 0; i < output_size_; i++) {
out[i] = input1[base_iter.GetInputPosA()] < input2[base_iter.GetInputPosB()];
auto x = input1[base_iter.GetInputPosA()];
auto y = input2[base_iter.GetInputPosB()];
out[i] = std::less<T>()(x, y);
base_iter.GenNextPos();
}
}
@ -50,7 +56,9 @@ void ArithmeticLogicCPUKernel<T>::Equal(const T *input1, const T *input2, bool *
auto iter = base_iter;
iter.SetPos(start);
for (size_t i = start; i < end; i++) {
out[i] = input1[iter.GetInputPosA()] == input2[iter.GetInputPosB()];
auto x = input1[iter.GetInputPosA()];
auto y = input2[iter.GetInputPosB()];
out[i] = std::equal_to<T>()(x, y);
iter.GenNextPos();
}
};
@ -64,7 +72,9 @@ void ArithmeticLogicCPUKernel<T>::NotEqual(const T *input1, const T *input2, boo
auto iter = base_iter;
iter.SetPos(start);
for (size_t i = start; i < end; i++) {
out[i] = input1[iter.GetInputPosA()] != input2[iter.GetInputPosB()];
auto x = input1[iter.GetInputPosA()];
auto y = input2[iter.GetInputPosB()];
out[i] = std::not_equal_to<T>()(x, y);
iter.GenNextPos();
}
};
@ -106,7 +116,9 @@ void ArithmeticLogicCPUKernel<T>::Greater(const T *input1, const T *input2, bool
auto iter = base_iter;
iter.SetPos(start);
for (size_t i = start; i < end; i++) {
out[i] = input1[iter.GetInputPosA()] > input2[iter.GetInputPosB()];
auto x = input1[iter.GetInputPosA()];
auto y = input2[iter.GetInputPosB()];
out[i] = std::greater<T>()(x, y);
iter.GenNextPos();
}
};
@ -120,7 +132,9 @@ void ArithmeticLogicCPUKernel<T>::GreaterEqual(const T *input1, const T *input2,
auto iter = base_iter;
iter.SetPos(start);
for (size_t i = start; i < end; i++) {
out[i] = input1[iter.GetInputPosA()] >= input2[iter.GetInputPosB()];
auto x = input1[iter.GetInputPosA()];
auto y = input2[iter.GetInputPosB()];
out[i] = std::greater_equal<T>()(x, y);
iter.GenNextPos();
}
};
@ -134,7 +148,9 @@ void ArithmeticLogicCPUKernel<T>::LessEqual(const T *input1, const T *input2, bo
auto iter = base_iter;
iter.SetPos(start);
for (size_t i = start; i < end; i++) {
out[i] = input1[iter.GetInputPosA()] <= input2[iter.GetInputPosB()];
auto x = input1[iter.GetInputPosA()];
auto y = input2[iter.GetInputPosB()];
out[i] = std::less_equal<T>()(x, y);
iter.GenNextPos();
}
};

View File

@ -43,9 +43,9 @@ void DropoutGradCpuBwdKernel::InitKernel(const CNodePtr &kernel_node) {
bool DropoutGradCpuBwdKernel::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
const std::vector<AddressPtr> &outputs) {
if (dtype_ == kNumberTypeFloat16) {
DropoutBackwardKernel<float16>(inputs, outputs, num_count_, keep_prob_);
DropoutBackwardKernel<float16>(inputs, outputs, keep_prob_);
} else if (dtype_ == kNumberTypeFloat32) {
DropoutBackwardKernel<float>(inputs, outputs, num_count_, keep_prob_);
DropoutBackwardKernel<float>(inputs, outputs, keep_prob_);
} else {
MS_LOG(ERROR) << "Input data type: " << dtype_ << " is not supported for DropoutGrad kernel for CPU.";
}
@ -55,8 +55,7 @@ bool DropoutGradCpuBwdKernel::Launch(const std::vector<AddressPtr> &inputs, cons
template <typename T>
void DropoutGradCpuBwdKernel::DropoutBackwardKernel(const std::vector<AddressPtr> &inputs,
const std::vector<AddressPtr> &outputs, size_t num_count,
float keep_prob) {
const std::vector<AddressPtr> &outputs, float keep_prob) {
auto *output = reinterpret_cast<T *>(outputs[0]->addr);
const auto *input = reinterpret_cast<T *>(inputs[0]->addr);
const auto *mask = reinterpret_cast<T *>(inputs[1]->addr);
@ -70,7 +69,7 @@ void DropoutGradCpuBwdKernel::DropoutBackwardKernel(const std::vector<AddressPtr
input_tmp[i] = static_cast<float>(input[i]);
mask_tmp[i] = static_cast<float>(mask[i]);
}
DropoutGrad(input_tmp, mask_tmp, output_tmp, num_count_, scale);
DropoutGrad(input_tmp, mask_tmp, output_tmp, SizeToInt(num_count_), scale);
for (size_t i = 0; i < num_count_; ++i) {
output[i] = static_cast<float16>(output_tmp[i]);
}
@ -78,7 +77,7 @@ void DropoutGradCpuBwdKernel::DropoutBackwardKernel(const std::vector<AddressPtr
delete[] output_tmp;
delete[] mask_tmp;
} else if constexpr (std::is_same_v<T, float>) {
DropoutGrad(input, mask, output, num_count_, scale);
DropoutGrad(input, mask, output, SizeToInt(num_count_), scale);
}
}
} // namespace kernel

View File

@ -40,7 +40,7 @@ class DropoutGradCpuBwdKernel : public CPUKernel {
TypeId dtype_{kTypeUnknown};
template <typename T>
void DropoutBackwardKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs,
size_t num_count, float keep_prob);
float keep_prob);
};
MS_REG_CPU_KERNEL(DropoutGrad, KernelAttr(), DropoutGradCpuBwdKernel);

View File

@ -64,7 +64,8 @@ void SplitCPUKernel<T>::LaunchSplit(T *input, T **output, size_t size) {
param.split_count_ *= input_shape_[i];
}
auto task = [&](size_t start, size_t end) {
(void)DoSplit(input, reinterpret_cast<void **>(output), &input_shape_[0], start, end - start, &param, sizeof(T));
(void)DoSplit(input, reinterpret_cast<void **>(output), &input_shape_[0], SizeToInt(start), SizeToInt(end - start),
&param, SizeToInt(sizeof(T)));
};
CPUKernelUtils::ParallelForAutoSearch(task, param.split_count_ * param.num_split_, &parallel_search_info_);
return;

View File

@ -46,8 +46,8 @@ void TransposeCPUFwdKernel::InitKernel(const CNodePtr &kernel_node) {
transpose_param_.strides_[num_axes - 1] = 1;
transpose_param_.out_strides_[num_axes - 1] = 1;
for (int i = num_axes - 2; i >= 0; i--) {
transpose_param_.strides_[i] = input_shape_[i + 1] * transpose_param_.strides_[i + 1];
transpose_param_.out_strides_[i] = output_shape_[i + 1] * transpose_param_.out_strides_[i + 1];
transpose_param_.strides_[i] = SizeToInt(input_shape_[i + 1]) * transpose_param_.strides_[i + 1];
transpose_param_.out_strides_[i] = SizeToInt(output_shape_[i + 1]) * transpose_param_.out_strides_[i + 1];
}
launch_map_[kNumberTypeInt8] = &TransposeCPUFwdKernel::LaunchKernel<int8_t>;
launch_map_[kNumberTypeInt16] = &TransposeCPUFwdKernel::LaunchKernel<int16_t>;
@ -87,7 +87,7 @@ void TransposeCPUFwdKernel::LaunchKernel(const std::vector<AddressPtr> &inputs,
}
size_t data_count = (inputs[0]->size) / sizeof(T);
if (axes_.size() <= DIMENSION_6D && data_count < MAX_TRANSPOSE_SERIAL_SIZE) {
int res = NNACL_ERR;
int res = static_cast<int>(NNACL_ERR);
if constexpr (std::is_same_v<T, int8_t>) {
res = DoTransposeInt8(input_addr, output_addr, output_shape, &transpose_param_);
} else if constexpr (std::is_same_v<T, int16_t>) {
@ -121,7 +121,7 @@ template <typename T>
void TransposeCPUFwdKernel::ParallelRun(const T *input_addr, T *output_addr, const int *output_shape, size_t count) {
auto max_thread_num = common::ThreadPool::GetInstance().GetSyncRunThreadNum();
const float block_size = 128.0;
size_t thread_num = count < block_size * max_thread_num ? std::ceil(count / block_size) : max_thread_num;
size_t thread_num = count < block_size * max_thread_num ? FloatToSize(std::ceil(count / block_size)) : max_thread_num;
std::vector<common::Task> tasks;
std::function<void(const T *, T *, const int *, TransposeParameter *, int, int)> TransposeDims;
@ -147,13 +147,13 @@ void TransposeCPUFwdKernel::ParallelRun(const T *input_addr, T *output_addr, con
TransposeDims = &TransposeDimsBool;
}
for (int task_id = 0; task_id < SizeToInt(thread_num); ++task_id) {
auto task = [&, task_id, thread_num]() {
auto task = [this, &TransposeDims, &input_addr, &output_addr, &output_shape, task_id, thread_num]() {
TransposeDims(input_addr, output_addr, output_shape, &transpose_param_, task_id, SizeToInt(thread_num));
return common::SUCCESS;
};
tasks.emplace_back(task);
(void)tasks.emplace_back(task);
}
common::ThreadPool::GetInstance().SyncRun(tasks);
(void)common::ThreadPool::GetInstance().SyncRun(tasks);
}
} // namespace kernel
} // namespace mindspore

View File

@ -29,18 +29,18 @@ void UnpackCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) {
}
output_num_ = LongToSize(AnfAlgo::GetNodeAttr<int64_t>(kernel_node, "num"));
unstack_param_.num_ = SizeToInt(output_num_);
unstack_param_.axis_ = LongToSize(axis_tmp);
unstack_param_.axis_ = LongToInt(axis_tmp);
unstack_param_.pre_dims_ = 1;
unstack_param_.axis_dim_ = 1;
unstack_param_.after_dims_ = 1;
for (size_t i = 0; i < input_shape.size(); i++) {
if (static_cast<int>(i) < unstack_param_.axis_) {
unstack_param_.pre_dims_ *= input_shape[i];
} else if (static_cast<int>(i) > unstack_param_.axis_) {
unstack_param_.after_dims_ *= input_shape[i];
if (i < IntToSize(unstack_param_.axis_)) {
unstack_param_.pre_dims_ *= SizeToInt(input_shape[i]);
} else if (i > IntToSize(unstack_param_.axis_)) {
unstack_param_.after_dims_ *= SizeToInt(input_shape[i]);
} else {
unstack_param_.axis_dim_ = input_shape[i];
unstack_param_.axis_dim_ = SizeToInt(input_shape[i]);
}
}
dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 0);