Code Cleanings 1102
This commit is contained in:
parent
8f0b175207
commit
37074876d2
|
@ -36,9 +36,9 @@ void BincountCpuKernelMod::InitKernel(const CNodePtr &kernel_node) {
|
|||
}
|
||||
|
||||
template <typename T_in, typename T_out>
|
||||
void BincountTask(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspaces,
|
||||
void BincountTask(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
|
||||
const std::vector<AddressPtr> &outputs, const std::vector<int64_t> &input_arr_sizes, int32_t num_bins,
|
||||
const std::vector<int64_t> &input_weights_sizes, const std::vector<int64_t> &output_sizes) {
|
||||
const std::vector<int64_t> &input_weights_sizes, const std::vector<int64_t> &) {
|
||||
auto bin_array = static_cast<T_in *>(inputs[0]->addr);
|
||||
auto output_data = static_cast<T_out *>(outputs[0]->addr);
|
||||
const size_t data_num = SizeOf(input_arr_sizes);
|
||||
|
|
|
@ -187,9 +187,9 @@ size_t CombinedNonMaxSuppressionCpuKernelMod::nms_perbath(float *boxes, float *s
|
|||
int *valid_detection) {
|
||||
int box_size = num_bath_ * num_detection_ * sizeof(float) * multiplier;
|
||||
int score_size = num_bath_ * num_detection_ * sizeof(float);
|
||||
void(memset_s(nmsed_boxes, box_size, 0.0, box_size));
|
||||
void(memset_s(nmsed_scores, score_size, 0.0, score_size));
|
||||
void(memset_s(nmsed_class, score_size, 0.0, score_size));
|
||||
void(memset_s(nmsed_boxes, box_size, 0, box_size));
|
||||
void(memset_s(nmsed_scores, score_size, 0, score_size));
|
||||
void(memset_s(nmsed_class, score_size, 0, score_size));
|
||||
void(memset_s(valid_detection, sizeof(int) * num_bath_, 0, sizeof(int) * num_bath_));
|
||||
const float box_min = 0.0;
|
||||
const float box_max = 1.0;
|
||||
|
|
|
@ -86,7 +86,7 @@ struct TTypes {
|
|||
class EigenTensor {
|
||||
public:
|
||||
EigenTensor() = delete;
|
||||
EigenTensor(ShapeVector &shape, void *data_ptr) : tensor_shape(shape), tensor_data_ptr(data_ptr) {}
|
||||
EigenTensor(const ShapeVector &shape, void *data_ptr) : tensor_shape(shape), tensor_data_ptr(data_ptr) {}
|
||||
EigenTensor(std::vector<size_t> &shape, void *data_ptr) : tensor_data_ptr(data_ptr) {
|
||||
for (size_t dim : shape) {
|
||||
(void)tensor_shape.emplace_back(static_cast<int64_t>(dim));
|
||||
|
|
|
@ -100,19 +100,19 @@ std::unique_ptr<T[]> GeqrfCpuKernelMod::Larf(size_t m, size_t n, T *x, T *tau, s
|
|||
}
|
||||
|
||||
template <typename T>
|
||||
void GeqrfCpuKernelMod::Geqrf(size_t num_m, size_t num_n, T *x, T *tau) {
|
||||
if (num_m < 0 || num_n < 0) {
|
||||
void GeqrfCpuKernelMod::Geqrf(size_t num_m_, size_t num_n_, T *x, T *tau) {
|
||||
if (num_m_ < 0 || num_n_ < 0) {
|
||||
return;
|
||||
}
|
||||
size_t k = std::min(num_m, num_n);
|
||||
size_t k = std::min(num_m_, num_n_);
|
||||
T one = static_cast<T>(1);
|
||||
std::unique_ptr<T[]> workspace = std::make_unique<T[]>(num_n);
|
||||
std::unique_ptr<T[]> workspace = std::make_unique<T[]>(num_n_);
|
||||
for (size_t i = 0; i < k; i++) {
|
||||
Larfg<T>(num_m - i, i, i, x, tau + i);
|
||||
T aii = *(x + i * num_n + i);
|
||||
*(x + i * num_n + i) = one;
|
||||
workspace = Larf<T>(num_m - i, num_n - i - 1, x, tau + i, std::move(workspace), i, i + 1);
|
||||
*(x + i * num_n + i) = aii;
|
||||
Larfg<T>(num_m_ - i, i, i, x, tau + i);
|
||||
T aii = *(x + i * num_n_ + i);
|
||||
*(x + i * num_n_ + i) = one;
|
||||
workspace = Larf<T>(num_m_ - i, num_n_ - i - 1, x, tau + i, std::move(workspace), i, i + 1);
|
||||
*(x + i * num_n_ + i) = aii;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -215,7 +215,7 @@ bool MatrixExpCpuKernelMod::LaunchKernel(const std::vector<kernel::AddressPtr> &
|
|||
return true;
|
||||
}
|
||||
|
||||
void MatrixExpCpuKernelMod::TyepChangeForFp16(int64_t i, int64_t m, int64_t size_mm, mindspore::Float16 *input_x,
|
||||
void MatrixExpCpuKernelMod::TyepChangeForFp16(int64_t i, int64_t m, int64_t size_mm, const mindspore::Float16 *input_x,
|
||||
mindspore::Float16 *output_y) const {
|
||||
typedef Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor> MatrixXd;
|
||||
MatrixXd I(m, m);
|
||||
|
|
|
@ -66,7 +66,7 @@ class MatrixExpCpuKernelMod : public NativeCpuKernelMod, public MatchKernelHelpe
|
|||
bool LaunchKernel(const std::vector<kernel::AddressPtr> &inputs, const std::vector<kernel::AddressPtr> &workspace,
|
||||
const std::vector<kernel::AddressPtr> &outputs);
|
||||
|
||||
void TyepChangeForFp16(int64_t i, int64_t m, int64_t size_mm, mindspore::Float16 *input_x,
|
||||
void TyepChangeForFp16(int64_t i, int64_t m, int64_t size_mm, const mindspore::Float16 *input_x,
|
||||
mindspore::Float16 *output_y) const;
|
||||
|
||||
template <typename T>
|
||||
|
|
|
@ -59,7 +59,7 @@ bool NonDeterministicIntsCPUKernelMod::LaunchKernel(const std::vector<AddressPtr
|
|||
size_t output_elem_num = outputs[0]->size / sizeof(T1);
|
||||
auto task = [output](size_t start, size_t end) {
|
||||
auto max_data = std::numeric_limits<T1>::max();
|
||||
std::default_random_engine seed(time(0));
|
||||
std::default_random_engine seed(time(nullptr));
|
||||
std::uniform_int_distribution<T1> u(-max_data, max_data);
|
||||
for (size_t i = start; i < end; ++i) {
|
||||
output[i] = u(seed);
|
||||
|
|
|
@ -122,10 +122,13 @@ class CachedInterpolationCalculator {
|
|||
switch (new_indices_hand) {
|
||||
case 0:
|
||||
indexes_[0] = x_0;
|
||||
break;
|
||||
case 1:
|
||||
indexes_[1] = x_1;
|
||||
break;
|
||||
case caseid2:
|
||||
indexes_[kIndex2] = x_2;
|
||||
break;
|
||||
case caseid3:
|
||||
indexes_[kIndex3] = x_3;
|
||||
break;
|
||||
|
@ -198,10 +201,10 @@ inline void GetWeightsAndIndices(const float scale, const int64_t out_loc, const
|
|||
}
|
||||
}
|
||||
|
||||
static void ComputeXWeightsAndIndices(const ResizerState &resizer_state, const bool half_pixel_centers,
|
||||
static void ComputeXWeightsAndIndices(const ResizerState &resizer_state, const bool half_pixel_centers_,
|
||||
std::vector<WeightsAndIndices> *x_wais) {
|
||||
CachedInterpolationCalculator calc;
|
||||
if (half_pixel_centers) {
|
||||
if (half_pixel_centers_) {
|
||||
for (int64_t x = 0; x < resizer_state.out_width; ++x) {
|
||||
GetWeightsAndIndices<HalfPixelScaler, true>(resizer_state.width_scale, x, resizer_state.in_width,
|
||||
&(*x_wais)[static_cast<size_t>(x)]);
|
||||
|
@ -314,10 +317,10 @@ std::vector<float> CalSwitch(const WeightsAndIndices &x_wai, std::vector<float>
|
|||
}
|
||||
|
||||
template <typename T1, typename T2>
|
||||
inline void interpolate_with_caching(const T1 *input_data, const ResizerState &RS, const bool half_pixel_centers,
|
||||
inline void interpolate_with_caching(const T1 *input_data, const ResizerState &RS, const bool half_pixel_centers_,
|
||||
T2 output_data) {
|
||||
std::vector<WeightsAndIndices> x_wais(RS.out_width);
|
||||
ComputeXWeightsAndIndices(RS, half_pixel_centers, &x_wais);
|
||||
ComputeXWeightsAndIndices(RS, half_pixel_centers_, &x_wais);
|
||||
const int64_t in_row_width = RS.in_width * RS.channels;
|
||||
const int64_t in_batch_width = RS.in_height * in_row_width;
|
||||
const T1 *input_b_ptr = input_data;
|
||||
|
@ -327,7 +330,7 @@ inline void interpolate_with_caching(const T1 *input_data, const ResizerState &R
|
|||
for (int64_t b = 0; b < RS.batch_size; ++b, input_b_ptr += in_batch_width) {
|
||||
for (int64_t y = 0; y < RS.out_height; ++y, output_y_ptr += RS.out_width * RS.channels) {
|
||||
WeightsAndIndices y_wai;
|
||||
if (half_pixel_centers) {
|
||||
if (half_pixel_centers_) {
|
||||
GetWeightsAndIndices<HalfPixelScaler, true>(RS.height_scale, y, RS.in_height, &y_wai);
|
||||
} else {
|
||||
GetWeightsAndIndices<LegacyScaler, false>(RS.height_scale, y, RS.in_height, &y_wai);
|
||||
|
|
|
@ -109,10 +109,13 @@ class CachedInterpolationCalculator {
|
|||
switch (new_indices_hand) {
|
||||
case 0:
|
||||
indexes_[0] = x_0;
|
||||
break;
|
||||
case 1:
|
||||
indexes_[1] = x_1;
|
||||
break;
|
||||
case caseid2:
|
||||
indexes_[kIndex2] = x_2;
|
||||
break;
|
||||
case caseid3:
|
||||
indexes_[kIndex3] = x_3;
|
||||
break;
|
||||
|
@ -186,10 +189,10 @@ inline void GetWeightsAndIndicesGrad(const float scale, const int64_t out_loc, c
|
|||
}
|
||||
}
|
||||
|
||||
static void ComputeGradientXWeightsAndIndices(const ResizerGradState &RGS, const bool half_pixel_centers,
|
||||
static void ComputeGradientXWeightsAndIndices(const ResizerGradState &RGS, const bool half_pixel_centers_,
|
||||
std::vector<WeightsAndIndices> *x_wais) {
|
||||
CachedInterpolationCalculator calc;
|
||||
if (half_pixel_centers) {
|
||||
if (half_pixel_centers_) {
|
||||
for (int64_t x = 0; x < RGS.resized_width; ++x) {
|
||||
GetWeightsAndIndicesGrad<HalfPixelScalerGrad, true>(RGS.width_scale, x, RGS.original_width,
|
||||
&(*x_wais)[static_cast<size_t>(x)]);
|
||||
|
@ -284,10 +287,10 @@ void CalNonUtil(const ResizerGradState &RGS, const bool half_pixel_centers,
|
|||
}
|
||||
|
||||
template <typename T>
|
||||
inline void ResizeBicubicGrad(const float *input_grad, const ResizerGradState &RGS, const bool half_pixel_centers,
|
||||
inline void ResizeBicubicGrad(const float *input_grad, const ResizerGradState &RGS, const bool half_pixel_centers_,
|
||||
T *output_grad) {
|
||||
std::vector<WeightsAndIndices> x_wais(RGS.resized_width);
|
||||
ComputeGradientXWeightsAndIndices(RGS, half_pixel_centers, &x_wais);
|
||||
ComputeGradientXWeightsAndIndices(RGS, half_pixel_centers_, &x_wais);
|
||||
const bool flag = true;
|
||||
bool utils_flag = false;
|
||||
if (RGS.original_width * RGS.original_height * RGS.channels * RGS.batch_size >= kParallelDataNum) {
|
||||
|
@ -297,13 +300,13 @@ inline void ResizeBicubicGrad(const float *input_grad, const ResizerGradState &R
|
|||
for (int64_t b = 0; b < RGS.batch_size; ++b) {
|
||||
auto task = [&](int64_t start, int64_t end) {
|
||||
for (int64_t y = start; y < end; ++y) {
|
||||
ResizeCommomCalc(RGS, half_pixel_centers, x_wais, flag, input_grad, output_grad, b, y);
|
||||
ResizeCommomCalc(RGS, half_pixel_centers_, x_wais, flag, input_grad, output_grad, b, y);
|
||||
}
|
||||
};
|
||||
CPUKernelUtils::ParallelFor(task, static_cast<size_t>(RGS.resized_height));
|
||||
}
|
||||
} else {
|
||||
CalNonUtil(RGS, half_pixel_centers, x_wais, flag, input_grad, output_grad);
|
||||
CalNonUtil(RGS, half_pixel_centers_, x_wais, flag, input_grad, output_grad);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue