forked from mindspore-Ecosystem/mindspore
pclint warnings clean
This commit is contained in:
parent
9629b4ebd9
commit
bf0876ed23
|
@ -76,27 +76,10 @@ void ApplyAdagradCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs,
|
|||
|
||||
// multithreading
|
||||
size_t length = inputs[0]->size / sizeof(T);
|
||||
size_t max_thread_num = std::thread::hardware_concurrency();
|
||||
size_t use_thread_num = length < 128 * max_thread_num ? std::ceil(length / 128.0) : max_thread_num;
|
||||
std::vector<std::thread> threads;
|
||||
threads.reserve(use_thread_num);
|
||||
size_t start = 0;
|
||||
const size_t batch_size = (length + use_thread_num - 1) / use_thread_num;
|
||||
|
||||
if (batch_size == 0) {
|
||||
MS_LOG(EXCEPTION) << "Error occur in launch kernel";
|
||||
return;
|
||||
}
|
||||
while (start < length) {
|
||||
size_t end = (start + batch_size) > length ? length : (start + batch_size);
|
||||
threads.emplace_back(
|
||||
std::thread(&ApplyAdagradCPUKernel::LaunchApplyAdagrad<T *>, this, var, accum, lr, gradient, start, end));
|
||||
start += batch_size;
|
||||
}
|
||||
|
||||
for (auto &it : threads) {
|
||||
it.join();
|
||||
}
|
||||
auto task = [this, &var, &accum, lr, gradient](size_t start, size_t end) {
|
||||
LaunchApplyAdagrad(var, accum, lr, gradient, start, end);
|
||||
};
|
||||
CPUKernelUtils::ParallelForAutoSearch(task, length, ¶llel_search_info_);
|
||||
|
||||
// Copy result to output tensor
|
||||
auto output_var = reinterpret_cast<T *>(outputs[0]->addr);
|
||||
|
|
|
@ -13,10 +13,12 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "backend/kernel_compiler/cpu/arithmetic_logic_cpu_kernel.h"
|
||||
#include <cmath>
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include "backend/kernel_compiler/cpu/arithmetic_logic_cpu_kernel.h"
|
||||
#include <functional>
|
||||
#include "runtime/device/cpu/cpu_device_address.h"
|
||||
|
||||
namespace mindspore {
|
||||
|
@ -29,7 +31,9 @@ void ArithmeticLogicCPUKernel<T>::Less(const T *input1, const T *input2, bool *o
|
|||
auto iter = base_iter;
|
||||
iter.SetPos(start);
|
||||
for (size_t i = start; i < end; i++) {
|
||||
out[i] = input1[iter.GetInputPosA()] < input2[iter.GetInputPosB()];
|
||||
auto x = input1[iter.GetInputPosA()];
|
||||
auto y = input2[iter.GetInputPosB()];
|
||||
out[i] = std::less<T>()(x, y);
|
||||
iter.GenNextPos();
|
||||
}
|
||||
};
|
||||
|
@ -37,7 +41,9 @@ void ArithmeticLogicCPUKernel<T>::Less(const T *input1, const T *input2, bool *o
|
|||
} else {
|
||||
base_iter.SetPos(0);
|
||||
for (size_t i = 0; i < output_size_; i++) {
|
||||
out[i] = input1[base_iter.GetInputPosA()] < input2[base_iter.GetInputPosB()];
|
||||
auto x = input1[base_iter.GetInputPosA()];
|
||||
auto y = input2[base_iter.GetInputPosB()];
|
||||
out[i] = std::less<T>()(x, y);
|
||||
base_iter.GenNextPos();
|
||||
}
|
||||
}
|
||||
|
@ -50,7 +56,9 @@ void ArithmeticLogicCPUKernel<T>::Equal(const T *input1, const T *input2, bool *
|
|||
auto iter = base_iter;
|
||||
iter.SetPos(start);
|
||||
for (size_t i = start; i < end; i++) {
|
||||
out[i] = input1[iter.GetInputPosA()] == input2[iter.GetInputPosB()];
|
||||
auto x = input1[iter.GetInputPosA()];
|
||||
auto y = input2[iter.GetInputPosB()];
|
||||
out[i] = std::equal_to<T>()(x, y);
|
||||
iter.GenNextPos();
|
||||
}
|
||||
};
|
||||
|
@ -64,7 +72,9 @@ void ArithmeticLogicCPUKernel<T>::NotEqual(const T *input1, const T *input2, boo
|
|||
auto iter = base_iter;
|
||||
iter.SetPos(start);
|
||||
for (size_t i = start; i < end; i++) {
|
||||
out[i] = input1[iter.GetInputPosA()] != input2[iter.GetInputPosB()];
|
||||
auto x = input1[iter.GetInputPosA()];
|
||||
auto y = input2[iter.GetInputPosB()];
|
||||
out[i] = std::not_equal_to<T>()(x, y);
|
||||
iter.GenNextPos();
|
||||
}
|
||||
};
|
||||
|
@ -106,7 +116,9 @@ void ArithmeticLogicCPUKernel<T>::Greater(const T *input1, const T *input2, bool
|
|||
auto iter = base_iter;
|
||||
iter.SetPos(start);
|
||||
for (size_t i = start; i < end; i++) {
|
||||
out[i] = input1[iter.GetInputPosA()] > input2[iter.GetInputPosB()];
|
||||
auto x = input1[iter.GetInputPosA()];
|
||||
auto y = input2[iter.GetInputPosB()];
|
||||
out[i] = std::greater<T>()(x, y);
|
||||
iter.GenNextPos();
|
||||
}
|
||||
};
|
||||
|
@ -120,7 +132,9 @@ void ArithmeticLogicCPUKernel<T>::GreaterEqual(const T *input1, const T *input2,
|
|||
auto iter = base_iter;
|
||||
iter.SetPos(start);
|
||||
for (size_t i = start; i < end; i++) {
|
||||
out[i] = input1[iter.GetInputPosA()] >= input2[iter.GetInputPosB()];
|
||||
auto x = input1[iter.GetInputPosA()];
|
||||
auto y = input2[iter.GetInputPosB()];
|
||||
out[i] = std::greater_equal<T>()(x, y);
|
||||
iter.GenNextPos();
|
||||
}
|
||||
};
|
||||
|
@ -134,7 +148,9 @@ void ArithmeticLogicCPUKernel<T>::LessEqual(const T *input1, const T *input2, bo
|
|||
auto iter = base_iter;
|
||||
iter.SetPos(start);
|
||||
for (size_t i = start; i < end; i++) {
|
||||
out[i] = input1[iter.GetInputPosA()] <= input2[iter.GetInputPosB()];
|
||||
auto x = input1[iter.GetInputPosA()];
|
||||
auto y = input2[iter.GetInputPosB()];
|
||||
out[i] = std::less_equal<T>()(x, y);
|
||||
iter.GenNextPos();
|
||||
}
|
||||
};
|
||||
|
|
|
@ -43,9 +43,9 @@ void DropoutGradCpuBwdKernel::InitKernel(const CNodePtr &kernel_node) {
|
|||
bool DropoutGradCpuBwdKernel::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
|
||||
const std::vector<AddressPtr> &outputs) {
|
||||
if (dtype_ == kNumberTypeFloat16) {
|
||||
DropoutBackwardKernel<float16>(inputs, outputs, num_count_, keep_prob_);
|
||||
DropoutBackwardKernel<float16>(inputs, outputs, keep_prob_);
|
||||
} else if (dtype_ == kNumberTypeFloat32) {
|
||||
DropoutBackwardKernel<float>(inputs, outputs, num_count_, keep_prob_);
|
||||
DropoutBackwardKernel<float>(inputs, outputs, keep_prob_);
|
||||
} else {
|
||||
MS_LOG(ERROR) << "Input data type: " << dtype_ << " is not supported for DropoutGrad kernel for CPU.";
|
||||
}
|
||||
|
@ -55,8 +55,7 @@ bool DropoutGradCpuBwdKernel::Launch(const std::vector<AddressPtr> &inputs, cons
|
|||
|
||||
template <typename T>
|
||||
void DropoutGradCpuBwdKernel::DropoutBackwardKernel(const std::vector<AddressPtr> &inputs,
|
||||
const std::vector<AddressPtr> &outputs, size_t num_count,
|
||||
float keep_prob) {
|
||||
const std::vector<AddressPtr> &outputs, float keep_prob) {
|
||||
auto *output = reinterpret_cast<T *>(outputs[0]->addr);
|
||||
const auto *input = reinterpret_cast<T *>(inputs[0]->addr);
|
||||
const auto *mask = reinterpret_cast<T *>(inputs[1]->addr);
|
||||
|
@ -70,7 +69,7 @@ void DropoutGradCpuBwdKernel::DropoutBackwardKernel(const std::vector<AddressPtr
|
|||
input_tmp[i] = static_cast<float>(input[i]);
|
||||
mask_tmp[i] = static_cast<float>(mask[i]);
|
||||
}
|
||||
DropoutGrad(input_tmp, mask_tmp, output_tmp, num_count_, scale);
|
||||
DropoutGrad(input_tmp, mask_tmp, output_tmp, SizeToInt(num_count_), scale);
|
||||
for (size_t i = 0; i < num_count_; ++i) {
|
||||
output[i] = static_cast<float16>(output_tmp[i]);
|
||||
}
|
||||
|
@ -78,7 +77,7 @@ void DropoutGradCpuBwdKernel::DropoutBackwardKernel(const std::vector<AddressPtr
|
|||
delete[] output_tmp;
|
||||
delete[] mask_tmp;
|
||||
} else if constexpr (std::is_same_v<T, float>) {
|
||||
DropoutGrad(input, mask, output, num_count_, scale);
|
||||
DropoutGrad(input, mask, output, SizeToInt(num_count_), scale);
|
||||
}
|
||||
}
|
||||
} // namespace kernel
|
||||
|
|
|
@ -40,7 +40,7 @@ class DropoutGradCpuBwdKernel : public CPUKernel {
|
|||
TypeId dtype_{kTypeUnknown};
|
||||
template <typename T>
|
||||
void DropoutBackwardKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs,
|
||||
size_t num_count, float keep_prob);
|
||||
float keep_prob);
|
||||
};
|
||||
|
||||
MS_REG_CPU_KERNEL(DropoutGrad, KernelAttr(), DropoutGradCpuBwdKernel);
|
||||
|
|
|
@ -64,7 +64,8 @@ void SplitCPUKernel<T>::LaunchSplit(T *input, T **output, size_t size) {
|
|||
param.split_count_ *= input_shape_[i];
|
||||
}
|
||||
auto task = [&](size_t start, size_t end) {
|
||||
(void)DoSplit(input, reinterpret_cast<void **>(output), &input_shape_[0], start, end - start, ¶m, sizeof(T));
|
||||
(void)DoSplit(input, reinterpret_cast<void **>(output), &input_shape_[0], SizeToInt(start), SizeToInt(end - start),
|
||||
¶m, SizeToInt(sizeof(T)));
|
||||
};
|
||||
CPUKernelUtils::ParallelForAutoSearch(task, param.split_count_ * param.num_split_, ¶llel_search_info_);
|
||||
return;
|
||||
|
|
|
@ -46,8 +46,8 @@ void TransposeCPUFwdKernel::InitKernel(const CNodePtr &kernel_node) {
|
|||
transpose_param_.strides_[num_axes - 1] = 1;
|
||||
transpose_param_.out_strides_[num_axes - 1] = 1;
|
||||
for (int i = num_axes - 2; i >= 0; i--) {
|
||||
transpose_param_.strides_[i] = input_shape_[i + 1] * transpose_param_.strides_[i + 1];
|
||||
transpose_param_.out_strides_[i] = output_shape_[i + 1] * transpose_param_.out_strides_[i + 1];
|
||||
transpose_param_.strides_[i] = SizeToInt(input_shape_[i + 1]) * transpose_param_.strides_[i + 1];
|
||||
transpose_param_.out_strides_[i] = SizeToInt(output_shape_[i + 1]) * transpose_param_.out_strides_[i + 1];
|
||||
}
|
||||
launch_map_[kNumberTypeInt8] = &TransposeCPUFwdKernel::LaunchKernel<int8_t>;
|
||||
launch_map_[kNumberTypeInt16] = &TransposeCPUFwdKernel::LaunchKernel<int16_t>;
|
||||
|
@ -87,7 +87,7 @@ void TransposeCPUFwdKernel::LaunchKernel(const std::vector<AddressPtr> &inputs,
|
|||
}
|
||||
size_t data_count = (inputs[0]->size) / sizeof(T);
|
||||
if (axes_.size() <= DIMENSION_6D && data_count < MAX_TRANSPOSE_SERIAL_SIZE) {
|
||||
int res = NNACL_ERR;
|
||||
int res = static_cast<int>(NNACL_ERR);
|
||||
if constexpr (std::is_same_v<T, int8_t>) {
|
||||
res = DoTransposeInt8(input_addr, output_addr, output_shape, &transpose_param_);
|
||||
} else if constexpr (std::is_same_v<T, int16_t>) {
|
||||
|
@ -121,7 +121,7 @@ template <typename T>
|
|||
void TransposeCPUFwdKernel::ParallelRun(const T *input_addr, T *output_addr, const int *output_shape, size_t count) {
|
||||
auto max_thread_num = common::ThreadPool::GetInstance().GetSyncRunThreadNum();
|
||||
const float block_size = 128.0;
|
||||
size_t thread_num = count < block_size * max_thread_num ? std::ceil(count / block_size) : max_thread_num;
|
||||
size_t thread_num = count < block_size * max_thread_num ? FloatToSize(std::ceil(count / block_size)) : max_thread_num;
|
||||
std::vector<common::Task> tasks;
|
||||
std::function<void(const T *, T *, const int *, TransposeParameter *, int, int)> TransposeDims;
|
||||
|
||||
|
@ -147,13 +147,13 @@ void TransposeCPUFwdKernel::ParallelRun(const T *input_addr, T *output_addr, con
|
|||
TransposeDims = &TransposeDimsBool;
|
||||
}
|
||||
for (int task_id = 0; task_id < SizeToInt(thread_num); ++task_id) {
|
||||
auto task = [&, task_id, thread_num]() {
|
||||
auto task = [this, &TransposeDims, &input_addr, &output_addr, &output_shape, task_id, thread_num]() {
|
||||
TransposeDims(input_addr, output_addr, output_shape, &transpose_param_, task_id, SizeToInt(thread_num));
|
||||
return common::SUCCESS;
|
||||
};
|
||||
tasks.emplace_back(task);
|
||||
(void)tasks.emplace_back(task);
|
||||
}
|
||||
common::ThreadPool::GetInstance().SyncRun(tasks);
|
||||
(void)common::ThreadPool::GetInstance().SyncRun(tasks);
|
||||
}
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -29,18 +29,18 @@ void UnpackCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) {
|
|||
}
|
||||
output_num_ = LongToSize(AnfAlgo::GetNodeAttr<int64_t>(kernel_node, "num"));
|
||||
unstack_param_.num_ = SizeToInt(output_num_);
|
||||
unstack_param_.axis_ = LongToSize(axis_tmp);
|
||||
unstack_param_.axis_ = LongToInt(axis_tmp);
|
||||
unstack_param_.pre_dims_ = 1;
|
||||
unstack_param_.axis_dim_ = 1;
|
||||
unstack_param_.after_dims_ = 1;
|
||||
|
||||
for (size_t i = 0; i < input_shape.size(); i++) {
|
||||
if (static_cast<int>(i) < unstack_param_.axis_) {
|
||||
unstack_param_.pre_dims_ *= input_shape[i];
|
||||
} else if (static_cast<int>(i) > unstack_param_.axis_) {
|
||||
unstack_param_.after_dims_ *= input_shape[i];
|
||||
if (i < IntToSize(unstack_param_.axis_)) {
|
||||
unstack_param_.pre_dims_ *= SizeToInt(input_shape[i]);
|
||||
} else if (i > IntToSize(unstack_param_.axis_)) {
|
||||
unstack_param_.after_dims_ *= SizeToInt(input_shape[i]);
|
||||
} else {
|
||||
unstack_param_.axis_dim_ = input_shape[i];
|
||||
unstack_param_.axis_dim_ = SizeToInt(input_shape[i]);
|
||||
}
|
||||
}
|
||||
dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 0);
|
||||
|
|
Loading…
Reference in New Issue