fix issue I5XC4B and code sync

This commit is contained in:
twc 2022-09-19 16:25:42 +08:00
parent 890e5a124c
commit 0611f8f813
6 changed files with 16 additions and 26 deletions

View File

@ -79,10 +79,6 @@ int RandomCategoricalCpuKernel::Resize(const BaseOperatorPtr &base_operator, con
auto kernel_ptr = std::dynamic_pointer_cast<ops::RandomCategorical>(base_operator);
MS_EXCEPTION_IF_NULL(kernel_ptr);
seed_ = kernel_ptr->get_seed();
if (seed_ <= 0) {
std::random_device rd;
seed_ = static_cast<int64_t>(rd());
}
return KRET_OK;
}

View File

@ -67,13 +67,12 @@ int ReduceStdCpuKernelMod::Resize(const BaseOperatorPtr &base_operator, const st
});
sort(axis_.begin(), axis_.end());
auto last = std::unique(axis_.begin(), axis_.end());
axis_.erase(last, axis_.end());
(void)axis_.erase(last, axis_.end());
return KRET_OK;
}
template <typename T>
void ReduceStdCpuKernelMod::RunReduceStd(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> &workspace,
const std::vector<kernel::AddressPtr> &outputs) {
size_t input_size = inputs[0]->size / sizeof(T);
if (input_size > kReduceSmallVectorSize) {
@ -99,32 +98,31 @@ void ReduceStdCpuKernelMod::RunReduceStd(const std::vector<kernel::AddressPtr> &
template <typename T>
void ReduceStdCpuKernelMod::RunReduceStdWithSAxis(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> &workspace,
const std::vector<kernel::AddressPtr> &outputs) {
T *input_addr = reinterpret_cast<T *>(inputs[0]->addr);
T *output_std_addr = reinterpret_cast<T *>(outputs[0]->addr);
T *output_mean_addr = reinterpret_cast<T *>(outputs[1]->addr);
int dimension = input_shape_.size();
size_t dimension = input_shape_.size();
size_t stride = 1;
std::vector<size_t> axes(input_shape_.size());
size_t j = 0;
size_t k = 0;
for (int i = 0; i < dimension; ++i) {
if (j == axis_.size() || i != axis_[j]) {
for (size_t i = 0; i < dimension; ++i) {
if (j == axis_.size() || i != LongToSize(axis_[j])) {
axes[k] = i;
++k;
} else {
stride *= input_shape_[i];
stride *= LongToSize(input_shape_[i]);
++j;
}
}
for (auto &it : axis_) {
axes[k] = it;
axes[k] = LongToSize(it);
++k;
}
size_t output_size = outputs[0]->size / sizeof(T);
std::vector<int64_t> transpose_shape(input_shape_.size());
for (int i = 0; i < dimension; ++i) {
for (size_t i = 0; i < dimension; ++i) {
transpose_shape[i] = input_shape_[axes[i]];
}
@ -158,22 +156,22 @@ void ReduceStdCpuKernelMod::RunReduceStdWithSAxis(const std::vector<kernel::Addr
}
bool ReduceStdCpuKernelMod::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> &workspace,
const std::vector<kernel::AddressPtr> &,
const std::vector<kernel::AddressPtr> &outputs) {
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kReduceStdInputsNum, kernel_name_);
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kReduceStdOutputsNum, kernel_name_);
if (axis_.empty() || input_shape_.empty() || input_shape_.size() == 1) {
if (dtype_ == kNumberTypeFloat16) {
RunReduceStd<float16>(inputs, workspace, outputs);
RunReduceStd<float16>(inputs, outputs);
} else if (dtype_ == kNumberTypeFloat32) {
RunReduceStd<float>(inputs, workspace, outputs);
RunReduceStd<float>(inputs, outputs);
}
} else {
if (dtype_ == kNumberTypeFloat16) {
RunReduceStdWithSAxis<float16>(inputs, workspace, outputs);
RunReduceStdWithSAxis<float16>(inputs, outputs);
} else if (dtype_ == kNumberTypeFloat32) {
RunReduceStdWithSAxis<float>(inputs, workspace, outputs);
RunReduceStdWithSAxis<float>(inputs, outputs);
}
}
return true;

View File

@ -42,12 +42,10 @@ class ReduceStdCpuKernelMod : public NativeCpuKernelMod {
const std::vector<KernelTensorPtr> &outputs, const std::map<uint32_t, tensor::TensorPtr> &) override;
template <typename T>
void RunReduceStd(const std::vector<kernel::AddressPtr> &inputs, const std::vector<kernel::AddressPtr> &workspace,
const std::vector<kernel::AddressPtr> &outputs);
void RunReduceStd(const std::vector<kernel::AddressPtr> &inputs, const std::vector<kernel::AddressPtr> &outputs);
template <typename T>
void RunReduceStdWithSAxis(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> &workspace,
const std::vector<kernel::AddressPtr> &outputs);
protected:

View File

@ -70,6 +70,7 @@ class PackFwdGpuKernelMod : public NativeGpuKernelMod {
input_num_ = inputs.size();
inputs_host_ = std::make_unique<T *[]>(input_num_);
dims_behind_axis_ = 1;
for (size_t i = 0; i < input_num_; i++) {
size_t input_size = 1;
auto input_shape = inputs.at(i)->GetShapeVector();
@ -79,7 +80,6 @@ class PackFwdGpuKernelMod : public NativeGpuKernelMod {
dims_behind_axis_ *= static_cast<size_t>(input_shape[j]);
}
}
input_size_list_.push_back(input_size * sizeof(T));
}
workspace_size_list_.push_back(sizeof(T *) * input_num_);
@ -97,8 +97,6 @@ class PackFwdGpuKernelMod : public NativeGpuKernelMod {
if (output_num != 1) {
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of outputs must be 1, but got " << output_num;
}
output_size_list_.push_back(output_size_ * sizeof(T));
return true;
}

View File

@ -67,7 +67,7 @@ class L2NormalizeInfer : public abstract::OpInferBase {
}
// failed to get vector<int64_t> axis from infer
auto axis_vec = CheckAndConvertUtils::CheckIntOrTupleInt("attribute[axis]", primitive->GetAttr("axis"), prim_name);
int axis = axis_vec[0];
int64_t axis = axis_vec[0];
CheckAndConvertUtils::CheckInRange("axis value", axis, kIncludeLeft, {-input_rank, input_rank}, prim_name);
auto output_shape = input_shape;

View File

@ -99,7 +99,7 @@ std::vector<int64_t> ReduceStd::get_axis() const {
if (axis_value->isa<api::ValueSequence>()) {
axis = api::GetValue<std::vector<int64_t>>(axis_value);
} else if (axis_value->isa<api::Int64Imm>()) {
axis.emplace_back(api::GetValue<int64_t>(axis_value));
(void)axis.emplace_back(api::GetValue<int64_t>(axis_value));
} else {
MS_EXCEPTION(TypeError) << "For ReduceStd, the type of attribute `axis` is invalid.";
}