!25527 Support float64 for CPU OP, including arithmetic, reshape, scatter_nd_update, select.

Merge pull request !25527 from hezhenhao1/add_double
2021-10-30 10:33:48 +00:00 · 2021-10-30 10:33:48 +00:00 · 3452d6e55d
parent 2be5c8c2d8 92aee7c6fe
commit 3452d6e55d
35 changed files with 544 additions and 89 deletions
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/arithmetic_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/arithmetic_cpu_kernel.h
@ -64,24 +64,80 @@ class ArithmeticCPUKernel : public CPUKernel {
  std::vector<size_t> output_element_num_;
 };

-MS_REG_CPU_KERNEL_T(Sub, KernelAttr(), ArithmeticCPUKernel, int32_t);
-MS_REG_CPU_KERNEL_T(Sub, KernelAttr(), ArithmeticCPUKernel, float);
-MS_REG_CPU_KERNEL_T(Sub, KernelAttr(), ArithmeticCPUKernel, int64_t);
-MS_REG_CPU_KERNEL_T(Pow, KernelAttr(), ArithmeticCPUKernel, int32_t);
-MS_REG_CPU_KERNEL_T(Pow, KernelAttr(), ArithmeticCPUKernel, float);
-MS_REG_CPU_KERNEL_T(Pow, KernelAttr(), ArithmeticCPUKernel, int64_t);
-MS_REG_CPU_KERNEL_T(RealDiv, KernelAttr(), ArithmeticCPUKernel, int32_t);
-MS_REG_CPU_KERNEL_T(RealDiv, KernelAttr(), ArithmeticCPUKernel, float);
-MS_REG_CPU_KERNEL_T(RealDiv, KernelAttr(), ArithmeticCPUKernel, float16);
-MS_REG_CPU_KERNEL_T(RealDiv, KernelAttr(), ArithmeticCPUKernel, int64_t);
-MS_REG_CPU_KERNEL_T(Div, KernelAttr(), ArithmeticCPUKernel, int32_t);
-MS_REG_CPU_KERNEL_T(Div, KernelAttr(), ArithmeticCPUKernel, float);
-MS_REG_CPU_KERNEL_T(Div, KernelAttr(), ArithmeticCPUKernel, int64_t);
-MS_REG_CPU_KERNEL_T(Mul, KernelAttr(), ArithmeticCPUKernel, float);
-MS_REG_CPU_KERNEL_T(Mul, KernelAttr(), ArithmeticCPUKernel, int32_t);
 MS_REG_CPU_KERNEL_T(
-  FloorDiv, KernelAttr().AddInputAttr(kNumberTypeInt64).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt64),
+  Add, KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32),
+  ArithmeticCPUKernel, int32_t);
+MS_REG_CPU_KERNEL_T(
+  Add, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
+  ArithmeticCPUKernel, float);
+MS_REG_CPU_KERNEL_T(
+  Add, KernelAttr().AddInputAttr(kNumberTypeInt64).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt64),
  ArithmeticCPUKernel, int64_t);
+MS_REG_CPU_KERNEL_T(
+  Add, KernelAttr().AddInputAttr(kNumberTypeFloat64).AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeFloat64),
+  ArithmeticCPUKernel, double);
+MS_REG_CPU_KERNEL_T(
+  Sub, KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32),
+  ArithmeticCPUKernel, int32_t);
+MS_REG_CPU_KERNEL_T(
+  Sub, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
+  ArithmeticCPUKernel, float);
+MS_REG_CPU_KERNEL_T(
+  Sub, KernelAttr().AddInputAttr(kNumberTypeInt64).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt64),
+  ArithmeticCPUKernel, int64_t);
+MS_REG_CPU_KERNEL_T(
+  Sub, KernelAttr().AddInputAttr(kNumberTypeFloat64).AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeFloat64),
+  ArithmeticCPUKernel, double);
+MS_REG_CPU_KERNEL_T(
+  Mul, KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32),
+  ArithmeticCPUKernel, int32_t);
+MS_REG_CPU_KERNEL_T(
+  Mul, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
+  ArithmeticCPUKernel, float);
+MS_REG_CPU_KERNEL_T(
+  Mul, KernelAttr().AddInputAttr(kNumberTypeInt64).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt64),
+  ArithmeticCPUKernel, int64_t);
+MS_REG_CPU_KERNEL_T(
+  Mul, KernelAttr().AddInputAttr(kNumberTypeFloat64).AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeFloat64),
+  ArithmeticCPUKernel, double);
+MS_REG_CPU_KERNEL_T(
+  Div, KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32),
+  ArithmeticCPUKernel, int32_t);
+MS_REG_CPU_KERNEL_T(
+  Div, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
+  ArithmeticCPUKernel, float);
+MS_REG_CPU_KERNEL_T(
+  Div, KernelAttr().AddInputAttr(kNumberTypeInt64).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt64),
+  ArithmeticCPUKernel, int64_t);
+MS_REG_CPU_KERNEL_T(
+  Div, KernelAttr().AddInputAttr(kNumberTypeFloat64).AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeFloat64),
+  ArithmeticCPUKernel, double);
+MS_REG_CPU_KERNEL_T(
+  Pow, KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32),
+  ArithmeticCPUKernel, int32_t);
+MS_REG_CPU_KERNEL_T(
+  Pow, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
+  ArithmeticCPUKernel, float);
+MS_REG_CPU_KERNEL_T(
+  Pow, KernelAttr().AddInputAttr(kNumberTypeInt64).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt64),
+  ArithmeticCPUKernel, int64_t);
+MS_REG_CPU_KERNEL_T(
+  Pow, KernelAttr().AddInputAttr(kNumberTypeFloat64).AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeFloat64),
+  ArithmeticCPUKernel, double);
+MS_REG_CPU_KERNEL_T(
+  RealDiv, KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32),
+  ArithmeticCPUKernel, int32_t);
+MS_REG_CPU_KERNEL_T(
+  RealDiv,
+  KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
+  ArithmeticCPUKernel, float);
+MS_REG_CPU_KERNEL_T(
+  RealDiv, KernelAttr().AddInputAttr(kNumberTypeInt64).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt64),
+  ArithmeticCPUKernel, int64_t);
+MS_REG_CPU_KERNEL_T(
+  RealDiv,
+  KernelAttr().AddInputAttr(kNumberTypeFloat64).AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeFloat64),
+  ArithmeticCPUKernel, double);
 MS_REG_CPU_KERNEL_T(
  FloorDiv, KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32),
  ArithmeticCPUKernel, int);
@ -89,6 +145,13 @@ MS_REG_CPU_KERNEL_T(
  FloorDiv,
  KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
  ArithmeticCPUKernel, float);
+MS_REG_CPU_KERNEL_T(
+  FloorDiv, KernelAttr().AddInputAttr(kNumberTypeInt64).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt64),
+  ArithmeticCPUKernel, int64_t);
+MS_REG_CPU_KERNEL_T(
+  FloorDiv,
+  KernelAttr().AddInputAttr(kNumberTypeFloat64).AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeFloat64),
+  ArithmeticCPUKernel, double);
 MS_REG_CPU_KERNEL_T(
  Mod, KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32),
  ArithmeticCPUKernel, int);
@ -103,7 +166,7 @@ MS_REG_CPU_KERNEL_T(
  ArithmeticCPUKernel, int64_t);
 MS_REG_CPU_KERNEL_T(
  FloorMod, KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32),
-  ArithmeticCPUKernel, int);
+  ArithmeticCPUKernel, int32_t);
 MS_REG_CPU_KERNEL_T(
  FloorMod,
  KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
@ -114,10 +177,18 @@ MS_REG_CPU_KERNEL_T(
  ArithmeticCPUKernel, float16);
 MS_REG_CPU_KERNEL_T(
  AssignAdd, KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32),
-  ArithmeticCPUKernel, int);
+  ArithmeticCPUKernel, int32_t);
+MS_REG_CPU_KERNEL_T(
+  AssignAdd,
+  KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
+  ArithmeticCPUKernel, float);
 MS_REG_CPU_KERNEL_T(
  AssignAdd, KernelAttr().AddInputAttr(kNumberTypeInt64).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt64),
  ArithmeticCPUKernel, int64_t);
+MS_REG_CPU_KERNEL_T(
+  AssignAdd,
+  KernelAttr().AddInputAttr(kNumberTypeFloat64).AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeFloat64),
+  ArithmeticCPUKernel, double);
 MS_REG_CPU_KERNEL_T(
  SquaredDifference,
  KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32),
@ -130,10 +201,18 @@ MS_REG_CPU_KERNEL_T(
  SquaredDifference,
  KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
  ArithmeticCPUKernel, float);
+MS_REG_CPU_KERNEL_T(
+  SquaredDifference,
+  KernelAttr().AddInputAttr(kNumberTypeFloat64).AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeFloat64),
+  ArithmeticCPUKernel, double);
 MS_REG_CPU_KERNEL_T(
  Atan2,
  KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
  ArithmeticCPUKernel, float);
+MS_REG_CPU_KERNEL_T(
+  Atan2,
+  KernelAttr().AddInputAttr(kNumberTypeFloat64).AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeFloat64),
+  ArithmeticCPUKernel, double);
 }  // namespace kernel
 }  // namespace mindspore

--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/arithmetic_logic_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/arithmetic_logic_cpu_kernel.h
@ -70,6 +70,9 @@ MS_REG_CPU_KERNEL_T(
 MS_REG_CPU_KERNEL_T(
  Less, KernelAttr().AddInputAttr(kNumberTypeInt64).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeBool),
  ArithmeticLogicCPUKernel, int64_t);
+MS_REG_CPU_KERNEL_T(
+  Less, KernelAttr().AddInputAttr(kNumberTypeFloat64).AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeBool),
+  ArithmeticLogicCPUKernel, double);
 MS_REG_CPU_KERNEL_T(
  Equal, KernelAttr().AddInputAttr(kNumberTypeBool).AddInputAttr(kNumberTypeBool).AddOutputAttr(kNumberTypeBool),
  ArithmeticLogicCPUKernel, bool);
@ -146,6 +149,10 @@ MS_REG_CPU_KERNEL_T(
  Greater,
  KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeBool),
  ArithmeticLogicCPUKernel, float);
+MS_REG_CPU_KERNEL_T(
+  Greater,
+  KernelAttr().AddInputAttr(kNumberTypeFloat64).AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeBool),
+  ArithmeticLogicCPUKernel, double);
 MS_REG_CPU_KERNEL_T(
  Greater, KernelAttr().AddInputAttr(kNumberTypeInt64).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeBool),
  ArithmeticLogicCPUKernel, int64_t);
@ -161,6 +168,10 @@ MS_REG_CPU_KERNEL_T(
  GreaterEqual,
  KernelAttr().AddInputAttr(kNumberTypeInt64).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeBool),
  ArithmeticLogicCPUKernel, int64_t);
+MS_REG_CPU_KERNEL_T(
+  GreaterEqual,
+  KernelAttr().AddInputAttr(kNumberTypeFloat64).AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeBool),
+  ArithmeticLogicCPUKernel, double);
 MS_REG_CPU_KERNEL_T(
  LessEqual, KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeBool),
  ArithmeticLogicCPUKernel, int);
@ -171,6 +182,10 @@ MS_REG_CPU_KERNEL_T(
  LessEqual,
  KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeBool),
  ArithmeticLogicCPUKernel, float);
+MS_REG_CPU_KERNEL_T(
+  LessEqual,
+  KernelAttr().AddInputAttr(kNumberTypeFloat64).AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeBool),
+  ArithmeticLogicCPUKernel, double);
 MS_REG_CPU_KERNEL_T(
  LogicalAnd, KernelAttr().AddInputAttr(kNumberTypeBool).AddInputAttr(kNumberTypeBool).AddOutputAttr(kNumberTypeBool),
  ArithmeticLogicCPUKernel, bool);
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/arithmetic_self_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/arithmetic_self_cpu_kernel.cc
@ -279,8 +279,10 @@ bool ArithmeticSelfCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inpu
                                     const std::vector<kernel::AddressPtr> &outputs) {
  CHECK_KERNEL_INPUTS_NUM(inputs.size(), kInputsNum, kernel_name_);
  CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kOutputsNum, kernel_name_);
-  if (dtype_ == kNumberTypeFloat32 || dtype_ == kNumberTypeFloat16 || dtype_ == kNumberTypeFloat64) {
+  if (dtype_ == kNumberTypeFloat32 || dtype_ == kNumberTypeFloat16) {
    LaunchKernel<float>(inputs, outputs);
+  } else if (dtype_ == kNumberTypeFloat64) {
+    LaunchKernel<double>(inputs, outputs);
  } else if (dtype_ == kNumberTypeInt32 || dtype_ == kNumberTypeInt16) {
    LaunchKernel<int>(inputs, outputs);
  } else if (dtype_ == kNumberTypeInt64) {
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/arithmetic_self_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/arithmetic_self_cpu_kernel.h
@ -42,7 +42,6 @@ class ArithmeticSelfCPUKernel : public CPUKernel {
  void LaunchLogicalNot(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs) const;

  TypeId dtype_{kTypeUnknown};
-  TypeId target_dtype_{kTypeUnknown};
 };

 template <typename T>
@ -59,8 +58,14 @@ MS_REG_CPU_KERNEL(Square, KernelAttr().AddInputAttr(kNumberTypeInt32).AddOutputA
                  ArithmeticSelfCPUKernel);
 MS_REG_CPU_KERNEL(Square, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
                  ArithmeticSelfCPUKernel);
+MS_REG_CPU_KERNEL(Square, KernelAttr().AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt64),
+                  ArithmeticSelfCPUKernel);
+MS_REG_CPU_KERNEL(Square, KernelAttr().AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeFloat64),
+                  ArithmeticSelfCPUKernel);
 MS_REG_CPU_KERNEL(Neg, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
                  ArithmeticSelfCPUKernel);
+MS_REG_CPU_KERNEL(Neg, KernelAttr().AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeFloat64),
+                  ArithmeticSelfCPUKernel);
 MS_REG_CPU_KERNEL(Neg, KernelAttr().AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32),
                  ArithmeticSelfCPUKernel);
 MS_REG_CPU_KERNEL(Neg, KernelAttr().AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt64),
@ -69,48 +74,84 @@ MS_REG_CPU_KERNEL(ZerosLike, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOu
                  ArithmeticSelfCPUKernel);
 MS_REG_CPU_KERNEL(ZerosLike, KernelAttr().AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32),
                  ArithmeticSelfCPUKernel);
+MS_REG_CPU_KERNEL(ZerosLike, KernelAttr().AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeFloat64),
+                  ArithmeticSelfCPUKernel);
 MS_REG_CPU_KERNEL(OnesLike, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
                  ArithmeticSelfCPUKernel);
 MS_REG_CPU_KERNEL(OnesLike, KernelAttr().AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32),
                  ArithmeticSelfCPUKernel);
+MS_REG_CPU_KERNEL(OnesLike, KernelAttr().AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeFloat64),
+                  ArithmeticSelfCPUKernel);
 MS_REG_CPU_KERNEL(Sign, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
                  ArithmeticSelfCPUKernel);
 MS_REG_CPU_KERNEL(Sign, KernelAttr().AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32),
                  ArithmeticSelfCPUKernel);
+MS_REG_CPU_KERNEL(Sign, KernelAttr().AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeFloat64),
+                  ArithmeticSelfCPUKernel);
 MS_REG_CPU_KERNEL(Floor, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
                  ArithmeticSelfCPUKernel);
+MS_REG_CPU_KERNEL(Floor, KernelAttr().AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeFloat64),
+                  ArithmeticSelfCPUKernel);
 MS_REG_CPU_KERNEL(Rint, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
                  ArithmeticSelfCPUKernel);
+MS_REG_CPU_KERNEL(Rint, KernelAttr().AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeFloat64),
+                  ArithmeticSelfCPUKernel);
 MS_REG_CPU_KERNEL(Round, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
                  ArithmeticSelfCPUKernel);
+MS_REG_CPU_KERNEL(Round, KernelAttr().AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeFloat64),
+                  ArithmeticSelfCPUKernel);
 MS_REG_CPU_KERNEL(Reciprocal, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
                  ArithmeticSelfCPUKernel);
+MS_REG_CPU_KERNEL(Reciprocal, KernelAttr().AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeFloat64),
+                  ArithmeticSelfCPUKernel);
 MS_REG_CPU_KERNEL(GeLU, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
                  ArithmeticSelfCPUKernel);
 MS_REG_CPU_KERNEL(LogicalNot, KernelAttr().AddInputAttr(kNumberTypeBool).AddOutputAttr(kNumberTypeBool),
                  ArithmeticSelfCPUKernel);
 MS_REG_CPU_KERNEL(Asin, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
                  ArithmeticSelfCPUKernel);
+MS_REG_CPU_KERNEL(Asin, KernelAttr().AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeFloat64),
+                  ArithmeticSelfCPUKernel);
 MS_REG_CPU_KERNEL(ACos, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
                  ArithmeticSelfCPUKernel);
+MS_REG_CPU_KERNEL(ACos, KernelAttr().AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeFloat64),
+                  ArithmeticSelfCPUKernel);
 MS_REG_CPU_KERNEL(Atan, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
                  ArithmeticSelfCPUKernel);
+MS_REG_CPU_KERNEL(Atan, KernelAttr().AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeFloat64),
+                  ArithmeticSelfCPUKernel);
 MS_REG_CPU_KERNEL(Sin, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
                  ArithmeticSelfCPUKernel);
+MS_REG_CPU_KERNEL(Sin, KernelAttr().AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeFloat64),
+                  ArithmeticSelfCPUKernel);
 MS_REG_CPU_KERNEL(Cos, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
                  ArithmeticSelfCPUKernel);
+MS_REG_CPU_KERNEL(Cos, KernelAttr().AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeFloat64),
+                  ArithmeticSelfCPUKernel);
 MS_REG_CPU_KERNEL(Tan, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
                  ArithmeticSelfCPUKernel);
+MS_REG_CPU_KERNEL(Tan, KernelAttr().AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeFloat64),
+                  ArithmeticSelfCPUKernel);
 MS_REG_CPU_KERNEL(Sinh, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
                  ArithmeticSelfCPUKernel);
+MS_REG_CPU_KERNEL(Sinh, KernelAttr().AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeFloat64),
+                  ArithmeticSelfCPUKernel);
 MS_REG_CPU_KERNEL(Cosh, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
                  ArithmeticSelfCPUKernel);
+MS_REG_CPU_KERNEL(Cosh, KernelAttr().AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeFloat64),
+                  ArithmeticSelfCPUKernel);
 MS_REG_CPU_KERNEL(Asinh, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
                  ArithmeticSelfCPUKernel);
+MS_REG_CPU_KERNEL(Asinh, KernelAttr().AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeFloat64),
+                  ArithmeticSelfCPUKernel);
 MS_REG_CPU_KERNEL(Acosh, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
                  ArithmeticSelfCPUKernel);
+MS_REG_CPU_KERNEL(Acosh, KernelAttr().AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeFloat64),
+                  ArithmeticSelfCPUKernel);
 MS_REG_CPU_KERNEL(Atanh, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
                  ArithmeticSelfCPUKernel);
+MS_REG_CPU_KERNEL(Atanh, KernelAttr().AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeFloat64),
+                  ArithmeticSelfCPUKernel);

 MS_REG_CPU_KERNEL_T(Identity, KernelAttr().AddInputAttr(kNumberTypeUInt64).AddOutputAttr(kNumberTypeUInt64),
                    IdentityCPUKernel, uint64_t);
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/eltwise_grad_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/eltwise_grad_cpu_kernel.cc
@ -221,31 +221,56 @@ void EltWiseGradCPUKernel<T>::SoftplusGrad(const T *input1, const T *input2, T *
  }
 }

+template <typename T>
+void EltWiseGradCPUKernel<T>::InitComputeFunc() {
+  if constexpr (std::is_same_v<T, double>) {
+    static const std::map<std::string,
+                          std::function<void(EltWiseGradCPUKernel *, const T *, const T *, T *, size_t, size_t)>>
+      elt_map{{prim::kPrimSqrtGrad->name(), &EltWiseGradCPUKernel<T>::SqrtGrad},
+              {prim::kPrimGeLUGrad->name(), &EltWiseGradCPUKernel<T>::GeluGrad},
+              {prim::kPrimAsinGrad->name(), &EltWiseGradCPUKernel<T>::AsinGrad},
+              {prim::kPrimACosGrad->name(), &EltWiseGradCPUKernel<T>::ACosGrad},
+              {prim::kPrimAtanGrad->name(), &EltWiseGradCPUKernel<T>::AtanGrad},
+              {prim::kPrimAsinhGrad->name(), &EltWiseGradCPUKernel<T>::AsinhGrad},
+              {prim::kPrimAcoshGrad->name(), &EltWiseGradCPUKernel<T>::AcoshGrad}};
+    if (elt_map.find(kernel_name_) == elt_map.end()) {
+      MS_LOG(EXCEPTION) << "EltWiseGradCPUKernel does not support " << kernel_name_;
+    }
+    compute_func_ = elt_map.at(kernel_name_);
+  } else {
+    static const std::map<std::string,
+                          std::function<void(EltWiseGradCPUKernel *, const T *, const T *, T *, size_t, size_t)>>
+      elt_map{{prim::kPrimReluGrad->name(), &EltWiseGradCPUKernel<T>::ReluGrad},
+              {prim::kPrimRelu6Grad->name(), &EltWiseGradCPUKernel<T>::ReLU6Grad},
+              {prim::kPrimSigmoidGrad->name(), &EltWiseGradCPUKernel<T>::SigmoidGrad},
+              {prim::kPrimAbsGrad->name(), &EltWiseGradCPUKernel<T>::AbsGrad},
+              {prim::kPrimTanhGrad->name(), &EltWiseGradCPUKernel<T>::TanhGrad},
+              {prim::kPrimSqrtGrad->name(), &EltWiseGradCPUKernel<T>::SqrtGrad},
+              {prim::kPrimGeLUGrad->name(), &EltWiseGradCPUKernel<T>::GeluGrad},
+              {prim::kPrimAsinGrad->name(), &EltWiseGradCPUKernel<T>::AsinGrad},
+              {prim::kPrimACosGrad->name(), &EltWiseGradCPUKernel<T>::ACosGrad},
+              {prim::kPrimAtanGrad->name(), &EltWiseGradCPUKernel<T>::AtanGrad},
+              {prim::kPrimAsinhGrad->name(), &EltWiseGradCPUKernel<T>::AsinhGrad},
+              {prim::kPrimAcoshGrad->name(), &EltWiseGradCPUKernel<T>::AcoshGrad},
+              {prim::kPrimSoftplusGrad->name(), &EltWiseGradCPUKernel<T>::SoftplusGrad}};
+    if (elt_map.find(kernel_name_) == elt_map.end()) {
+      MS_LOG(EXCEPTION) << "EltWiseGradCPUKernel does not support " << kernel_name_;
+    }
+    compute_func_ = elt_map.at(kernel_name_);
+  }
+}
+
 template <typename T>
 void EltWiseGradCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) {
  MS_EXCEPTION_IF_NULL(kernel_node);
  kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
+  InitComputeFunc();
 }

 template <typename T>
 bool EltWiseGradCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inputs,
                                     const std::vector<kernel::AddressPtr> &,
                                     const std::vector<kernel::AddressPtr> &outputs) {
-  static const std::map<std::string,
-                        std::function<void(EltWiseGradCPUKernel *, const T *, const T *, T *, size_t, size_t)>>
-    elt_map{{prim::kPrimReluGrad->name(), &EltWiseGradCPUKernel<T>::ReluGrad},
-            {prim::kPrimRelu6Grad->name(), &EltWiseGradCPUKernel<T>::ReLU6Grad},
-            {prim::kPrimSigmoidGrad->name(), &EltWiseGradCPUKernel<T>::SigmoidGrad},
-            {prim::kPrimAbsGrad->name(), &EltWiseGradCPUKernel<T>::AbsGrad},
-            {prim::kPrimTanhGrad->name(), &EltWiseGradCPUKernel<T>::TanhGrad},
-            {prim::kPrimSqrtGrad->name(), &EltWiseGradCPUKernel<T>::SqrtGrad},
-            {prim::kPrimGeLUGrad->name(), &EltWiseGradCPUKernel<T>::GeluGrad},
-            {prim::kPrimAsinGrad->name(), &EltWiseGradCPUKernel<T>::AsinGrad},
-            {prim::kPrimACosGrad->name(), &EltWiseGradCPUKernel<T>::ACosGrad},
-            {prim::kPrimAtanGrad->name(), &EltWiseGradCPUKernel<T>::AtanGrad},
-            {prim::kPrimAsinhGrad->name(), &EltWiseGradCPUKernel<T>::AsinhGrad},
-            {prim::kPrimAcoshGrad->name(), &EltWiseGradCPUKernel<T>::AcoshGrad},
-            {prim::kPrimSoftplusGrad->name(), &EltWiseGradCPUKernel<T>::SoftplusGrad}};
  if (inputs.size() < kInputMinNum || outputs.size() != kOutputNum) {
    MS_LOG(ERROR) << kernel_name_ << " requires at least 2 inputs and 1 output, but got " << inputs.size()
                  << " inputs and " << outputs.size() << " output.";
@ -260,7 +285,7 @@ bool EltWiseGradCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inpu
  auto output = reinterpret_cast<T *>(outputs[0]->addr);

  ParallelLaunchAutoSearch(
-    std::bind(elt_map.at(kernel_name_), this, input0, input1, output, std::placeholders::_1, std::placeholders::_2),
+    std::bind(compute_func_, this, input0, input1, output, std::placeholders::_1, std::placeholders::_2),
    outputs[0]->size / sizeof(T), this, &parallel_search_info_);
  return true;
 }
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/eltwise_grad_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/eltwise_grad_cpu_kernel.h
@ -38,6 +38,7 @@ class EltWiseGradCPUKernel : public CPUKernel {
              const std::vector<AddressPtr> &outputs) override;

 private:
+  void InitComputeFunc();
  void ReluGrad(const T *input1, const T *input2, T *out, size_t start, size_t end) const;
  void ReLU6Grad(const T *input1, const T *input2, T *out, size_t start, size_t end) const;
  void AbsGrad(const T *input1, const T *input2, T *out, size_t start, size_t end) const;
@ -52,7 +53,8 @@ class EltWiseGradCPUKernel : public CPUKernel {
  void AcoshGrad(const T *input1, const T *input2, T *out, size_t start, size_t end) const;
  void SoftplusGrad(const T *input1, const T *input2, T *out, size_t start, size_t end) const;

-  std::string kernel_name_ = "";
+  using TypeComputeFunc = std::function<void(EltWiseGradCPUKernel *, const T *, const T *, T *, size_t, size_t)>;
+  TypeComputeFunc compute_func_{nullptr};
 };

 MS_REG_CPU_KERNEL_T(
@ -75,6 +77,10 @@ MS_REG_CPU_KERNEL_T(
  SqrtGrad,
  KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
  EltWiseGradCPUKernel, float);
+MS_REG_CPU_KERNEL_T(
+  SqrtGrad,
+  KernelAttr().AddInputAttr(kNumberTypeFloat64).AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeFloat64),
+  EltWiseGradCPUKernel, double);
 MS_REG_CPU_KERNEL_T(
  TanhGrad,
  KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/reshape_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/reshape_cpu_kernel.h
@ -45,6 +45,8 @@ MS_REG_CPU_KERNEL(Reshape, KernelAttr().AddInputAttr(kNumberTypeInt64).AddOutput
 MS_REG_CPU_KERNEL(Reshape, KernelAttr().AddInputAttr(kNumberTypeBool).AddOutputAttr(kNumberTypeBool), ReshapeCPUKernel);
 MS_REG_CPU_KERNEL(Reshape, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
                  ReshapeCPUKernel);
+MS_REG_CPU_KERNEL(Reshape, KernelAttr().AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeFloat64),
+                  ReshapeCPUKernel);
 MS_REG_CPU_KERNEL(Reshape, KernelAttr().AddInputAttr(kNumberTypeUInt8).AddOutputAttr(kNumberTypeUInt8),
                  ReshapeCPUKernel);
 MS_REG_CPU_KERNEL(Reshape, KernelAttr().AddInputAttr(kNumberTypeUInt16).AddOutputAttr(kNumberTypeUInt16),
@ -71,6 +73,8 @@ MS_REG_CPU_KERNEL(Flatten, KernelAttr().AddInputAttr(kNumberTypeInt64).AddOutput
 MS_REG_CPU_KERNEL(Flatten, KernelAttr().AddInputAttr(kNumberTypeBool).AddOutputAttr(kNumberTypeBool), ReshapeCPUKernel);
 MS_REG_CPU_KERNEL(Flatten, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
                  ReshapeCPUKernel);
+MS_REG_CPU_KERNEL(Flatten, KernelAttr().AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeFloat64),
+                  ReshapeCPUKernel);
 MS_REG_CPU_KERNEL(Flatten, KernelAttr().AddInputAttr(kNumberTypeUInt8).AddOutputAttr(kNumberTypeUInt8),
                  ReshapeCPUKernel);
 MS_REG_CPU_KERNEL(Flatten, KernelAttr().AddInputAttr(kNumberTypeUInt16).AddOutputAttr(kNumberTypeUInt16),
@ -92,6 +96,8 @@ MS_REG_CPU_KERNEL(FlattenGrad, KernelAttr().AddInputAttr(kNumberTypeBool).AddOut
                  ReshapeCPUKernel);
 MS_REG_CPU_KERNEL(FlattenGrad, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
                  ReshapeCPUKernel);
+MS_REG_CPU_KERNEL(FlattenGrad, KernelAttr().AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeFloat64),
+                  ReshapeCPUKernel);
 MS_REG_CPU_KERNEL(FlattenGrad, KernelAttr().AddInputAttr(kNumberTypeUInt8).AddOutputAttr(kNumberTypeUInt8),
                  ReshapeCPUKernel);
 MS_REG_CPU_KERNEL(FlattenGrad, KernelAttr().AddInputAttr(kNumberTypeUInt16).AddOutputAttr(kNumberTypeUInt16),
@ -113,6 +119,8 @@ MS_REG_CPU_KERNEL(ExpandDims, KernelAttr().AddInputAttr(kNumberTypeBool).AddOutp
                  ReshapeCPUKernel);
 MS_REG_CPU_KERNEL(ExpandDims, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
                  ReshapeCPUKernel);
+MS_REG_CPU_KERNEL(ExpandDims, KernelAttr().AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeFloat64),
+                  ReshapeCPUKernel);
 MS_REG_CPU_KERNEL(ExpandDims, KernelAttr().AddInputAttr(kNumberTypeUInt8).AddOutputAttr(kNumberTypeUInt8),
                  ReshapeCPUKernel);
 MS_REG_CPU_KERNEL(ExpandDims, KernelAttr().AddInputAttr(kNumberTypeUInt16).AddOutputAttr(kNumberTypeUInt16),
@ -132,6 +140,8 @@ MS_REG_CPU_KERNEL(Squeeze, KernelAttr().AddInputAttr(kNumberTypeInt64).AddOutput
 MS_REG_CPU_KERNEL(Squeeze, KernelAttr().AddInputAttr(kNumberTypeBool).AddOutputAttr(kNumberTypeBool), ReshapeCPUKernel);
 MS_REG_CPU_KERNEL(Squeeze, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
                  ReshapeCPUKernel);
+MS_REG_CPU_KERNEL(Squeeze, KernelAttr().AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeFloat64),
+                  ReshapeCPUKernel);
 MS_REG_CPU_KERNEL(Squeeze, KernelAttr().AddInputAttr(kNumberTypeUInt8).AddOutputAttr(kNumberTypeUInt8),
                  ReshapeCPUKernel);
 MS_REG_CPU_KERNEL(Squeeze, KernelAttr().AddInputAttr(kNumberTypeUInt16).AddOutputAttr(kNumberTypeUInt16),
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/scatter_nd_update_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/scatter_nd_update_cpu_kernel.h
@ -72,6 +72,7 @@ MS_REG_CPU_KERNEL(TensorScatterUpdate,
                    .AddInputAttr(kNumberTypeFloat32)
                    .AddOutputAttr(kNumberTypeFloat32),
                  ScatterNdUpdateCPUKernel);
+
 MS_REG_CPU_KERNEL(ScatterNdUpdate,
                  KernelAttr()
                    .AddInputAttr(kNumberTypeInt32)
@ -79,6 +80,30 @@ MS_REG_CPU_KERNEL(ScatterNdUpdate,
                    .AddInputAttr(kNumberTypeInt32)
                    .AddOutputAttr(kNumberTypeInt32),
                  ScatterNdUpdateCPUKernel)
+
+MS_REG_CPU_KERNEL(TensorScatterUpdate,
+                  KernelAttr()
+                    .AddInputAttr(kNumberTypeInt32)
+                    .AddInputAttr(kNumberTypeInt32)
+                    .AddInputAttr(kNumberTypeInt32)
+                    .AddOutputAttr(kNumberTypeInt32),
+                  ScatterNdUpdateCPUKernel);
+
+MS_REG_CPU_KERNEL(ScatterNdUpdate,
+                  KernelAttr()
+                    .AddInputAttr(kNumberTypeFloat64)
+                    .AddInputAttr(kNumberTypeInt32)
+                    .AddInputAttr(kNumberTypeFloat64)
+                    .AddOutputAttr(kNumberTypeFloat64),
+                  ScatterNdUpdateCPUKernel);
+
+MS_REG_CPU_KERNEL(TensorScatterUpdate,
+                  KernelAttr()
+                    .AddInputAttr(kNumberTypeFloat64)
+                    .AddInputAttr(kNumberTypeInt32)
+                    .AddInputAttr(kNumberTypeFloat64)
+                    .AddOutputAttr(kNumberTypeFloat64),
+                  ScatterNdUpdateCPUKernel);
 }  // namespace kernel
 }  // namespace mindspore

--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/select_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/select_cpu_kernel.h
@ -46,6 +46,14 @@ MS_REG_CPU_KERNEL_T(Select,
                      .AddOutputAttr(kNumberTypeFloat32),
                    SelectCPUKernel, float);

+MS_REG_CPU_KERNEL_T(Select,
+                    KernelAttr()
+                      .AddInputAttr(kNumberTypeBool)
+                      .AddInputAttr(kNumberTypeFloat64)
+                      .AddInputAttr(kNumberTypeFloat64)
+                      .AddOutputAttr(kNumberTypeFloat64),
+                    SelectCPUKernel, double);
+
 MS_REG_CPU_KERNEL_T(Select,
                    KernelAttr()
                      .AddInputAttr(kNumberTypeBool)
--- a/mindspore/core/ops/floor.cc
+++ b/mindspore/core/ops/floor.cc
@ -39,7 +39,7 @@ abstract::ShapePtr FloorInferShape(const PrimitivePtr &prim, const std::vector<A
 }

 TypePtr FloorInferType(const PrimitivePtr &prim, const std::vector<AbstractBasePtr> &input_args) {
-  const std::set<TypePtr> valid_types = {kFloat16, kFloat32};
+  const std::set<TypePtr> valid_types = {kFloat16, kFloat32, kFloat64};
  auto x_type = input_args[0]->BuildType();
  (void)CheckAndConvertUtils::CheckTensorTypeValid("x", x_type, valid_types, prim->name());
  return x_type;
--- a/mindspore/ops/operations/array_ops.py
+++ b/mindspore/ops/operations/array_ops.py
@ -2973,7 +2973,7 @@ class Rint(PrimitiveWithInfer):
        Tensor, has the same shape and type as `input_x`.

    Raises:
-        TypeError: If dtype of `input_x` is neither float16 nor float32.
+        TypeError: If dtype of `input_x` is not in [float16, float32, float64].

    Supported Platforms:
        ``Ascend`` ``GPU`` ``CPU``
@ -3001,7 +3001,7 @@ class Rint(PrimitiveWithInfer):
        return x_shape

    def infer_dtype(self, x_dtype):
-        validator.check_tensor_dtype_valid('x', x_dtype, [mstype.float16, mstype.float32], self.name)
+        validator.check_tensor_dtype_valid('x', x_dtype, [mstype.float16, mstype.float32, mstype.float64], self.name)
        return x_dtype


--- a/mindspore/ops/operations/math_ops.py
+++ b/mindspore/ops/operations/math_ops.py
@ -1845,7 +1845,7 @@ class SquaredDifference(_MathBinaryOp):
    """

    def infer_dtype(self, x_dtype, y_dtype):
-        valid_type = [mstype.float16, mstype.float32, mstype.int32]
+        valid_type = [mstype.float16, mstype.float32, mstype.float64, mstype.int32]
        return _MathBinaryOp.do_infer_dtype(x_dtype, y_dtype, valid_type, self.name)


@ -2974,7 +2974,7 @@ class Floor(Primitive):
        Tensor, has the same shape as `x`.

    Raises:
-        TypeError: If dtype of `x` is not float16 or float32.
+        TypeError: If dtype of `x` is not in [float16, float32, float64].

    Supported Platforms:
        ``Ascend`` ``GPU`` ``CPU``
--- a/tests/st/ops/cpu/test_acos_op.py
+++ b/tests/st/ops/cpu/test_acos_op.py
@ -36,8 +36,14 @@ class NetACos(nn.Cell):
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
-def test_acos():
-    np_array = np.array([-1, -0.5, 0, 0.5, 1]).astype('float32')
+@pytest.mark.parametrize('dtype', [np.float32, np.float64])
+def test_acos(dtype):
+    """
+    Feature: ALL To ALL
+    Description: test cases for ACos
+    Expectation: the result match to numpy
+    """
+    np_array = np.array([-1, -0.5, 0, 0.5, 1], dtype=dtype)
    input_x = Tensor(np_array)
    net = NetACos()
    output = net(input_x)
--- a/tests/st/ops/cpu/test_acosh_op.py
+++ b/tests/st/ops/cpu/test_acosh_op.py
@ -36,8 +36,14 @@ class NetAcosh(nn.Cell):
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
-def test_acosh():
-    np_array = np.array([1, 2, 3, 4, 5]).astype('float32')
+@pytest.mark.parametrize('dtype', [np.float32, np.float64])
+def test_acosh(dtype):
+    """
+    Feature: ALL To ALL
+    Description: test cases for Acosh
+    Expectation: the result match to numpy
+    """
+    np_array = np.array([1, 2, 3, 4, 5], dtype=dtype)
    input_x = Tensor(np_array)
    net = NetAcosh()
    output = net(input_x)
--- a/tests/st/ops/cpu/test_arithmetic_op.py
+++ b/tests/st/ops/cpu/test_arithmetic_op.py
@ -80,6 +80,14 @@ def test_sub():
    expect_output = x - y
    assert np.all(output.asnumpy() == expect_output)

+    # float64
+    x = np.random.rand(2, 3, 4, 4).astype(np.float64)
+    y = np.random.rand(4, 1).astype(np.float64)
+    net = SubNet()
+    output = net(Tensor(x), Tensor(y, mindspore.float64))
+    expect_output = x - y
+    assert np.all(output.asnumpy() == expect_output)
+

@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@ -102,6 +110,8 @@ def test_div():
    y6_np = np.random.randint(1, 100, (2, 3, 4, 4)).astype(np.float32) * prop
    x7_np = np.random.randint(1, 100, (2, 1, 1, 4)).astype(np.int64) * prop
    y7_np = np.random.randint(1, 100, (2, 3, 4, 4)).astype(np.int64) * prop
+    x8_np = np.random.randint(1, 100, (2, 3, 4, 4)).astype(np.float64) * prop
+    y8_np = np.random.randint(1, 100, (2, 1, 4, 4)).astype(np.float64) * prop

    x0 = Tensor(x0_np)
    y0 = Tensor(y0_np)
@ -119,6 +129,8 @@ def test_div():
    y6 = Tensor(y6_np)
    x7 = Tensor(x7_np)
    y7 = Tensor(y7_np)
+    x8 = Tensor(x8_np)
+    y8 = Tensor(y8_np)

    context.set_context(mode=context.GRAPH_MODE, device_target='CPU')
    div = DivNet()
@ -173,6 +185,13 @@ def test_div():
    assert np.all(output7.asnumpy() == expect7)
    assert output7.shape == expect7.shape

+    output8 = div(x8, y8)
+    expect8 = np.divide(x8_np, y8_np)
+    diff8 = output8.asnumpy() - expect8
+    error8 = np.ones(shape=expect8.shape) * 1.0e-7
+    assert np.all(diff8 < error8)
+    assert output8.shape == expect8.shape
+

@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@ -189,6 +208,8 @@ def test_floor_div():
    y3_np = np.random.randint(1, 100, (2, 3, 4, 4)).astype(np.float32) * prop
    x4_np = np.random.randint(1, 100, (2, 1, 1, 4)).astype(np.int64) * prop
    y4_np = np.random.randint(1, 100, (2, 3, 4, 4)).astype(np.int64) * prop
+    x5_np = np.random.randint(1, 100, (2, 3, 4, 4)).astype(np.float64) * prop
+    y5_np = np.random.randint(1, 100, (2, 1, 4, 4)).astype(np.float64) * prop

    x0 = Tensor(x0_np)
    y0 = Tensor(y0_np)
@ -200,6 +221,8 @@ def test_floor_div():
    y3 = Tensor(y3_np)
    x4 = Tensor(x4_np)
    y4 = Tensor(y4_np)
+    x5 = Tensor(x5_np)
+    y5 = Tensor(y5_np)

    context.set_context(mode=context.GRAPH_MODE, device_target='CPU')
    floor_div = FloorDivNet()
@ -238,6 +261,13 @@ def test_floor_div():
    assert np.all(diff4 < error4)
    assert output4.shape == expect4.shape

+    output5 = floor_div(x5, y5)
+    expect5 = np.floor_divide(x5_np, y5_np)
+    diff5 = output5.asnumpy() - expect5
+    error5 = np.ones(shape=expect5.shape) * 1.0e-7
+    assert np.all(diff5 < error5)
+    assert output5.shape == expect5.shape
+

@pytest.mark.level0
@pytest.mark.platform_x86_cpu
--- a/tests/st/ops/cpu/test_arithmetic_self_op.py
+++ b/tests/st/ops/cpu/test_arithmetic_self_op.py
@ -144,6 +144,13 @@ def test_floor():
    print(output.asnumpy())
    assert np.all(output.asnumpy() == expect_output)

+    x = np.random.randn(4, 3).astype(np.float64)
+    x = x * 100
+    output = net(Tensor(x))
+    expect_output = np.floor(x)
+    print(output.asnumpy())
+    assert np.all(output.asnumpy() == expect_output)
+

@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@ -161,6 +168,11 @@ def test_rint():
    expect_output = np.rint(x).astype(np.float32)
    np.testing.assert_almost_equal(output.asnumpy(), expect_output)

+    x = np.random.randn(3, 4, 5, 6).astype(np.float64) * prop
+    output = net(Tensor(x))
+    expect_output = np.rint(x).astype(np.float64)
+    np.testing.assert_almost_equal(output.asnumpy(), expect_output)
+

@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@ -178,6 +190,11 @@ def test_round():
    expect_output = np.round(x).astype(np.float32)
    np.testing.assert_almost_equal(output.asnumpy(), expect_output)

+    x = np.array([0.9920, -0.4077, 0.9734, -1.0362, 1.5, -2.5, 4.5]).astype(np.float64)
+    output = net(Tensor(x))
+    expect_output = np.round(x).astype(np.float64)
+    np.testing.assert_almost_equal(output.asnumpy(), expect_output)
+

@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@ -199,6 +216,13 @@ def test_reciprocal():
    error = np.ones(shape=expect_output.shape) * 1.0e-5
    assert np.all(np.abs(diff) < error)

+    x = np.random.randn(3, 4, 5, 6).astype(np.float64) * prop
+    output = net(Tensor(x))
+    expect_output = (1. / x).astype(np.float64)
+    diff = output.asnumpy() - expect_output
+    error = np.ones(shape=expect_output.shape) * 1.0e-7
+    assert np.all(np.abs(diff) < error)
+

@pytest.mark.level0
@pytest.mark.platform_x86_cpu
--- a/tests/st/ops/cpu/test_asin_op.py
+++ b/tests/st/ops/cpu/test_asin_op.py
@ -36,8 +36,14 @@ class NetAsin(nn.Cell):
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
-def test_asin():
-    np_array = np.array([-1, -0.5, 0, 0.5, 1]).astype('float32')
+@pytest.mark.parametrize('dtype', [np.float32, np.float64])
+def test_asin(dtype):
+    """
+    Feature: ALL To ALL
+    Description: test cases for ASin
+    Expectation: the result match to numpy
+    """
+    np_array = np.array([-1, -0.5, 0, 0.5, 1], dtype=dtype)
    input_x = Tensor(np_array)
    net = NetAsin()
    output = net(input_x)
--- a/tests/st/ops/cpu/test_asinh_op.py
+++ b/tests/st/ops/cpu/test_asinh_op.py
@ -36,8 +36,14 @@ class NetAsinh(nn.Cell):
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
-def test_asinh():
-    np_array = np.array([-1, -0.5, 0, 0.5, 1]).astype('float32')
+@pytest.mark.parametrize('dtype', [np.float32, np.float64])
+def test_asinh(dtype):
+    """
+    Feature: ALL To ALL
+    Description: test cases for Asinh
+    Expectation: the result match to numpy
+    """
+    np_array = np.array([-1, -0.5, 0, 0.5, 1], dtype=dtype)
    input_x = Tensor(np_array)
    net = NetAsinh()
    output = net(input_x)
--- a/tests/st/ops/cpu/test_assign_add_op.py
+++ b/tests/st/ops/cpu/test_assign_add_op.py
@ -36,7 +36,13 @@ class AssignAdd(nn.Cell):
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
-def test_assign_add():
+@pytest.mark.parametrize('dtype', [np.int32, np.int64, np.float32, np.float64])
+def test_assign_add(dtype):
+    """
+    Feature: ALL To ALL
+    Description: test cases for AssignAdd
+    Expectation: the result match to numpy
+    """
    expect1 = np.array([[[[0, 2, 4.],
                          [6, 8, 10.],
                          [12, 14, 16.]],
@ -56,8 +62,8 @@ def test_assign_add():
                          [63, 66, 69],
                          [72, 75, 78]]]])

-    x2 = Tensor(np.arange(1 * 3 * 3 * 3).reshape(1, 3, 3, 3).astype(np.float32))
-    y2 = Tensor(np.arange(1 * 3 * 3 * 3).reshape(1, 3, 3, 3).astype(np.float32))
+    x2 = Tensor(np.arange(1 * 3 * 3 * 3).reshape(1, 3, 3, 3).astype(dtype))
+    y2 = Tensor(np.arange(1 * 3 * 3 * 3).reshape(1, 3, 3, 3).astype(dtype))

    context.set_context(mode=context.GRAPH_MODE, device_target='CPU')
    add = AssignAdd(x2)
--- a/tests/st/ops/cpu/test_atan2_op.py
+++ b/tests/st/ops/cpu/test_atan2_op.py
@ -44,3 +44,11 @@ def test_atan2():
    print(output)
    expect = np.arctan2(np_array, np_array)
    assert np.allclose(output.asnumpy(), expect)
+
+    np_array = np.array([1, 2, 3, 4, 5], dtype=np.float64)
+    input_x = Tensor(np_array)
+    net = NetAtan2()
+    output = net(input_x, input_x)
+    print(output)
+    expect = np.arctan2(np_array, np_array)
+    assert np.allclose(output.asnumpy(), expect)
--- a/tests/st/ops/cpu/test_atan_op.py
+++ b/tests/st/ops/cpu/test_atan_op.py
@ -36,8 +36,14 @@ class NetAtan(nn.Cell):
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
-def test_atan():
-    np_array = np.array([-1, -0.5, 0, 0.5, 1]).astype('float32')
+@pytest.mark.parametrize('dtype', [np.float32, np.float64])
+def test_atan(dtype):
+    """
+    Feature: ALL To ALL
+    Description: test cases for Atan
+    Expectation: the result match to numpy
+    """
+    np_array = np.array([-1, -0.5, 0, 0.5, 1], dtype=dtype)
    input_x = Tensor(np_array)
    net = NetAtan()
    output = net(input_x)
--- a/tests/st/ops/cpu/test_atanh_op.py
+++ b/tests/st/ops/cpu/test_atanh_op.py
@ -36,8 +36,14 @@ class NetAtanh(nn.Cell):
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
-def test_atanh():
-    np_array = np.array([-0.5, 0, 0.5]).astype('float32')
+@pytest.mark.parametrize('dtype', [np.float32, np.float64])
+def test_atanh(dtype):
+    """
+    Feature: ALL To ALL
+    Description: test cases for Atanh
+    Expectation: the result match to numpy
+    """
+    np_array = np.array([-0.5, 0, 0.5], dtype)
    input_x = Tensor(np_array)
    net = NetAtanh()
    output = net(input_x)
--- a/tests/st/ops/cpu/test_cos_op.py
+++ b/tests/st/ops/cpu/test_cos_op.py
@ -44,3 +44,11 @@ def test_cos():
    print(output)
    expect = np.cos(np_array)
    assert np.allclose(output.asnumpy(), expect)
+
+    np_array = np.array([-1, -0.5, 0, 0.5, 1]).astype('float64')
+    input_x = Tensor(np_array)
+    net = NetCos()
+    output = net(input_x)
+    print(output)
+    expect = np.cos(np_array)
+    assert np.allclose(output.asnumpy(), expect)
--- a/tests/st/ops/cpu/test_greater_equal_op.py
+++ b/tests/st/ops/cpu/test_greater_equal_op.py
@ -64,6 +64,26 @@ def test_float32():
    assert outputs.shape == (3,)
    assert np.allclose(outputs.asnumpy(), [True, True, True])

+@pytest.mark.level0
+@pytest.mark.platform_x86_cpu
+@pytest.mark.env_onecard
+def test_float64():
+    """
+    Feature: ALL To ALL
+    Description: test cases for GreaterEqual of float64
+    Expectation: the result match to numpy
+    """
+    op = P.GreaterEqual()
+    op_wrapper = OpNetWrapper(op)
+
+    input_x = Tensor(np.array([1, 2, -1]).astype(np.float64))
+    input_y = Tensor(np.array([-3, 2, -1]).astype(np.float64))
+    outputs = op_wrapper(input_x, input_y)
+
+    print(outputs)
+    assert outputs.shape == (3,)
+    assert np.allclose(outputs.asnumpy(), [True, True, True])
+

 if __name__ == '__main__':
    test_int32()
--- a/tests/st/ops/cpu/test_greater_op.py
+++ b/tests/st/ops/cpu/test_greater_op.py
@ -65,6 +65,26 @@ def test_float32():
    assert np.allclose(outputs.asnumpy(), [True, False, False])


+@pytest.mark.level0
+@pytest.mark.platform_x86_cpu
+@pytest.mark.env_onecard
+def test_float64():
+    """
+    Feature: ALL To ALL
+    Description: test cases for Greater
+    Expectation: the result match to numpy
+    """
+    op = P.Greater()
+    op_wrapper = OpNetWrapper(op)
+
+    input_x = Tensor(np.array([1, 2, -1]).astype(np.float64))
+    input_y = Tensor(np.array([-3, 2, -1]).astype(np.float64))
+    outputs = op_wrapper(input_x, input_y)
+
+    print(outputs)
+    assert outputs.shape == (3,)
+    assert np.allclose(outputs.asnumpy(), [True, False, False])
+
 if __name__ == '__main__':
    test_int32()
    test_float32()
--- a/tests/st/ops/cpu/test_less_equal_op.py
+++ b/tests/st/ops/cpu/test_less_equal_op.py
@ -177,12 +177,16 @@ def test_net_int64():
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
 def test_net_float64():
+    """
+    Feature: ALL To ALL
+    Description: test cases for LessEqual of float64
+    Expectation: the result match to numpy
+    """
    x1_np = np.random.randint(1, 5, (2, 3, 4, 4)).astype(np.float64)
    y1_np = np.random.randint(1, 5, (2, 1, 4, 4)).astype(np.float64)
    x1 = Tensor(x1_np)
    y1 = Tensor(y1_np)

-
    context.set_context(mode=context.GRAPH_MODE, device_target='CPU')
    net = Net()
    out = net(x1, y1).asnumpy()
--- a/tests/st/ops/cpu/test_less_op.py
+++ b/tests/st/ops/cpu/test_less_op.py
@ -32,17 +32,23 @@ class Net(nn.Cell):
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
-def test_net():
-    x0_np = np.random.randint(1, 5, (2, 3, 4, 4)).astype(np.float32)
-    y0_np = np.random.randint(1, 5, (2, 3, 4, 4)).astype(np.float32)
-    x1_np = np.random.randint(1, 5, (2, 3, 4, 4)).astype(np.float32)
-    y1_np = np.random.randint(1, 5, (2, 1, 4, 4)).astype(np.float32)
-    x2_np = np.random.randint(1, 5, (2, 1, 1, 4)).astype(np.float32)
-    y2_np = np.random.randint(1, 5, (2, 3, 4, 4)).astype(np.float32)
-    x3_np = np.random.randint(1, 5, 1).astype(np.float32)
-    y3_np = np.random.randint(1, 5, 1).astype(np.float32)
-    x4_np = np.array(768).astype(np.float32)
-    y4_np = np.array(3072.5).astype(np.float32)
+@pytest.mark.parametrize('dtype', [np.int32, np.int64, np.float32, np.float64])
+def test_net(dtype):
+    """
+    Feature: ALL To ALL
+    Description: test cases for Less
+    Expectation: the result match to numpy
+    """
+    x0_np = np.random.randint(1, 5, (2, 3, 4, 4)).astype(dtype)
+    y0_np = np.random.randint(1, 5, (2, 3, 4, 4)).astype(dtype)
+    x1_np = np.random.randint(1, 5, (2, 3, 4, 4)).astype(dtype)
+    y1_np = np.random.randint(1, 5, (2, 1, 4, 4)).astype(dtype)
+    x2_np = np.random.randint(1, 5, (2, 1, 1, 4)).astype(dtype)
+    y2_np = np.random.randint(1, 5, (2, 3, 4, 4)).astype(dtype)
+    x3_np = np.random.randint(1, 5, 1).astype(dtype)
+    y3_np = np.random.randint(1, 5, 1).astype(dtype)
+    x4_np = np.array(768).astype(dtype)
+    y4_np = np.array(3072.5).astype(dtype)

    x0 = Tensor(x0_np)
    y0 = Tensor(y0_np)
--- a/tests/st/ops/cpu/test_oneslike_op.py
+++ b/tests/st/ops/cpu/test_oneslike_op.py
@ -34,9 +34,15 @@ class NetOnesLike(nn.Cell):
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
-def test_OnesLike():
-    x0_np = np.random.uniform(-2, 2, (2, 3, 4, 4)).astype(np.float32)
-    x1_np = np.random.uniform(-2, 2, 1).astype(np.float32)
+@pytest.mark.parametrize('dtype', [np.int32, np.float32, np.float64])
+def test_OnesLike(dtype):
+    """
+    Feature: ALL To ALL
+    Description: test cases for OnesLike
+    Expectation: the result match to numpy
+    """
+    x0_np = np.random.uniform(-2, 2, (2, 3, 4, 4)).astype(dtype)
+    x1_np = np.random.uniform(-2, 2, 1).astype(dtype)

    x0 = Tensor(x0_np)
    x1 = Tensor(x1_np)
--- a/tests/st/ops/cpu/test_pow_op.py
+++ b/tests/st/ops/cpu/test_pow_op.py
@ -35,15 +35,28 @@ class Net(nn.Cell):
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
 def test_net():
+    """
+    Feature: ALL To ALL
+    Description: test cases for Pow
+    Expectation: the result match to numpy
+    """
    x0_np = np.random.randint(1, 5, (2, 3, 4, 4)).astype(np.float32)
    y0_np = np.random.randint(1, 5, (2, 3, 4, 4)).astype(np.float32)
    x1_np = np.random.randint(1, 5, (2, 3, 4, 4)).astype(np.float32)
    y1_np = np.array(3).astype(np.float32)
+    x2_np = np.random.randint(1, 5, (2, 3, 4, 4)).astype(np.float64)
+    y2_np = np.random.randint(1, 5, (2, 3, 4, 4)).astype(np.float64)
+    x3_np = np.random.randint(1, 5, (2, 3, 4, 4)).astype(np.float64)
+    y3_np = np.array(3).astype(np.float64)

    x0 = Tensor(x0_np)
    y0 = Tensor(y0_np)
    x1 = Tensor(x1_np)
    y1 = Tensor(y1_np)
+    x2 = Tensor(x2_np)
+    y2 = Tensor(y2_np)
+    x3 = Tensor(x3_np)
+    y3 = Tensor(y3_np)

    context.set_context(mode=context.GRAPH_MODE, device_target='CPU')
    net = Net()
@ -56,3 +69,13 @@ def test_net():
    expect = np.power(x1_np, y1_np)
    assert np.all(out == expect)
    assert out.shape == expect.shape
+
+    out = net(x2, y2).asnumpy()
+    expect = np.power(x2_np, y2_np)
+    assert np.all(out == expect)
+    assert out.shape == expect.shape
+
+    out = net(x3, y3).asnumpy()
+    expect = np.power(x3_np, y3_np)
+    assert np.all(out == expect)
+    assert out.shape == expect.shape
--- a/tests/st/ops/cpu/test_realdiv_op.py
+++ b/tests/st/ops/cpu/test_realdiv_op.py
@ -34,17 +34,23 @@ class NetRealDiv(nn.Cell):
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
-def test_real_div():
-    x0_np = np.random.randint(1, 5, (2, 3, 4, 4)).astype(np.float32)
-    y0_np = np.random.randint(1, 5, (2, 3, 4, 4)).astype(np.float32)
-    x1_np = np.random.randint(1, 5, (2, 3, 4, 4)).astype(np.float32)
-    y1_np = np.random.randint(1, 5, (2, 1, 4, 4)).astype(np.float32)
-    x2_np = np.random.randint(1, 5, (2, 1, 1, 4)).astype(np.float32)
-    y2_np = np.random.randint(1, 5, (2, 3, 4, 4)).astype(np.float32)
-    x3_np = np.random.randint(1, 5, 1).astype(np.float32)
-    y3_np = np.random.randint(1, 5, 1).astype(np.float32)
-    x4_np = np.array(768).astype(np.float32)
-    y4_np = np.array(3072.5).astype(np.float32)
+@pytest.mark.parametrize('dtype', [np.float32, np.float64])
+def test_real_div(dtype):
+    """
+    Feature: ALL To ALL
+    Description: test cases for RealDiv
+    Expectation: the result match to numpy
+    """
+    x0_np = np.random.randint(1, 5, (2, 3, 4, 4)).astype(dtype)
+    y0_np = np.random.randint(1, 5, (2, 3, 4, 4)).astype(dtype)
+    x1_np = np.random.randint(1, 5, (2, 3, 4, 4)).astype(dtype)
+    y1_np = np.random.randint(1, 5, (2, 1, 4, 4)).astype(dtype)
+    x2_np = np.random.randint(1, 5, (2, 1, 1, 4)).astype(dtype)
+    y2_np = np.random.randint(1, 5, (2, 3, 4, 4)).astype(dtype)
+    x3_np = np.random.randint(1, 5, 1).astype(dtype)
+    y3_np = np.random.randint(1, 5, 1).astype(dtype)
+    x4_np = np.array(768).astype(dtype)
+    y4_np = np.array(3072.5).astype(dtype)

    x0 = Tensor(x0_np)
    y0 = Tensor(y0_np)
--- a/tests/st/ops/cpu/test_sign_op.py
+++ b/tests/st/ops/cpu/test_sign_op.py
@ -61,6 +61,25 @@ def test_sign_int32():
    assert np.allclose(outputs.asnumpy(), [[1, 0, -1]])


+@pytest.mark.level0
+@pytest.mark.platform_x86_cpu
+@pytest.mark.env_onecard
+def test_sign_float64():
+    """
+    Feature: ALL To ALL
+    Description: test cases for Sign of float64
+    Expectation: the result match to numpy
+    """
+    op = P.Sign()
+    op_wrapper = OpNetWrapper(op)
+
+    input_x = Tensor(np.array([[2.0, 0.0, -1.0]]).astype(np.float64))
+    outputs = op_wrapper(input_x)
+
+    print(outputs)
+    assert np.allclose(outputs.asnumpy(), [[1., 0., -1.]])
+
+
 if __name__ == '__main__':
    test_sign_float32()
    test_sign_int32()
--- a/tests/st/ops/cpu/test_sin_op.py
+++ b/tests/st/ops/cpu/test_sin_op.py
@ -44,3 +44,11 @@ def test_sin():
    print(output)
    expect = np.sin(np_array)
    assert np.allclose(output.asnumpy(), expect)
+
+    np_array = np.array([-1, -0.5, 0, 0.5, 1]).astype('float64')
+    input_x = Tensor(np_array)
+    net = NetSin()
+    output = net(input_x)
+    print(output)
+    expect = np.sin(np_array)
+    assert np.allclose(output.asnumpy(), expect)
--- a/tests/st/ops/cpu/test_square_op.py
+++ b/tests/st/ops/cpu/test_square_op.py
@ -47,8 +47,14 @@ class Net(nn.Cell):
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
-def test_net():
-    x = np.random.randn(2, 3, 3, 4).astype(np.float32)
+@pytest.mark.parametrize('dtype', [np.int32, np.int64, np.float32, np.float64])
+def test_net(dtype):
+    """
+    Feature: ALL To ALL
+    Description: test cases for Square
+    Expectation: the result match to numpy
+    """
+    x = np.random.randn(2, 3, 3, 4).astype(dtype)
    y_expect = x * x
    net = Net()
    out = net(Tensor(x))
@ -56,7 +62,7 @@ def test_net():
    err = np.ones(shape=y_expect.shape) * 1.0e-5
    assert np.all(diff < err)
    assert out.shape == y_expect.shape
-    sens = np.random.randn(2, 3, 3, 4).astype(np.float32)
+    sens = np.random.randn(2, 3, 3, 4).astype(dtype)
    backword_net = Grad(Net())
    output = backword_net(Tensor(x), Tensor(sens))
    print(len(output))
--- a/tests/st/ops/cpu/test_squared_difference_op.py
+++ b/tests/st/ops/cpu/test_squared_difference_op.py
@ -56,6 +56,14 @@ def test_net01():
    assert np.all(expect2 == output2)
    assert output2.shape == expect2.shape

+    x2 = np.random.randn(2, 3).astype(np.float64)
+    y2 = np.random.randn(2, 3).astype(np.float64)
+    output2 = net(Tensor(x2), Tensor(y2)).asnumpy()
+    diff = x2 - y2
+    expect2 = diff * diff
+    assert np.all(expect2 == output2)
+    assert output2.shape == expect2.shape
+
    x3 = np.random.randn(2, 3).astype(np.bool)
    y3 = np.random.randn(2, 3).astype(np.bool)
    try:
--- a/tests/st/ops/cpu/test_zeroslike_op.py
+++ b/tests/st/ops/cpu/test_zeroslike_op.py
@ -34,9 +34,15 @@ class NetZerosLike(nn.Cell):
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
-def test_ZerosLike():
-    x0_np = np.random.uniform(-2, 2, (2, 3, 4, 4)).astype(np.float32)
-    x1_np = np.random.uniform(-2, 2, 1).astype(np.float32)
+@pytest.mark.parametrize('dtype', [np.int32, np.float32, np.float64])
+def test_ZerosLike(dtype):
+    """
+    Feature: ALL To ALL
+    Description: test cases for ZerosLike
+    Expectation: the result match to numpy
+    """
+    x0_np = np.random.uniform(-2, 2, (2, 3, 4, 4)).astype(dtype)
+    x1_np = np.random.uniform(-2, 2, 1).astype(dtype)

    x0 = Tensor(x0_np)
    x1 = Tensor(x1_np)