add head for aot test

2021-12-07 16:36:14 +08:00 · 2021-12-07 16:36:14 +08:00 · d9531a4f73
parent 5acce31c72
commit d9531a4f73
8 changed files with 245 additions and 74 deletions
--- a/tests/st/ops/graph_kernel/custom/aot_test_files/add.cc
+++ b/tests/st/ops/graph_kernel/custom/aot_test_files/add.cc
@ -14,27 +14,48 @@
 * limitations under the License.
 */
 #include <string.h>
-using size_t = decltype(sizeof(int));
-using int64_t = decltype(sizeof(long));
+#include <cstdint>

 extern "C" int CustomAdd(int nparam, void **params, int *ndims, int64_t **shapes, const char **dtypes, void *stream,
                         void *extra) {
-  if (nparam != 3) return 1;
-  float *input1 = static_cast<float *>(params[0]);
-  float *input2 = static_cast<float *>(params[1]);
-  float *output = static_cast<float *>(params[2]);
-  size_t size = 1;
+  constexpr int OUTPUT_INDEX = 2;
+  constexpr int TOTAL_PARAM_NUM = 3;

-  for (int i = 0; i < ndims[2]; i++) {
-    size *= shapes[2][i];
+  // Users can add any check on their need. If check fails, user can return any value larger than 0 to safely exit.
+  // Return value larger than 0 will cause mindspore to stop computing and safely exit.
+  // Specially, return 1 will show log: "Number of parameters passed is inconsistent with what the user wants".
+  // return 2 will show log: "Type of parameters passed is inconsistent with what the user wants".
+
+  // This is to check if the num of parameters the same as what the user wants.
+  // In this case, there are two inputs and one output, so the nparam should be 3.
+  if (nparam != TOTAL_PARAM_NUM) {
+    return 1;
  }
+
+  // This is to check if the type of parameters the same as what the user wants.
  for (int i = 0; i < nparam; i++) {
    if (strcmp(dtypes[i], "float32") != 0) {
      return 2;
    }
  }
+
+  // input1's index is 0, input2's index is 1 and output's index is 2
+  float *input1 = static_cast<float *>(params[0]);
+  float *input2 = static_cast<float *>(params[1]);
+  float *output = static_cast<float *>(params[2]);
+  int size = 1;
+
+  // Cumprod of output's shape to compute elements' num
+  for (int i = 0; i < ndims[OUTPUT_INDEX]; i++) {
+    size *= shapes[OUTPUT_INDEX][i];
+  }
+
+  // Do the computation
+  // Add
  for (int i = 0; i < size; i++) {
    output[i] = input1[i] + input2[i];
  }
+
+  // When return 0, mindspore will continue to run if this kernel could launch successfully.
  return 0;
 }
--- a/tests/st/ops/graph_kernel/custom/aot_test_files/add.cu
+++ b/tests/st/ops/graph_kernel/custom/aot_test_files/add.cu
@ -13,10 +13,10 @@
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
-
-#define THREADS 1024
+constexpr int THREADS = 1024;
 __global__ void CustomAddKernel(float *input1, float *input2, float *output, size_t size) {
  auto idx = blockIdx.x * THREADS + threadIdx.x;
+  // Add
  if (idx < size) {
    output[idx] = input1[idx] + input2[idx];
  }
@ -25,22 +25,44 @@ __global__ void CustomAddKernel(float *input1, float *input2, float *output, siz
 extern "C" int CustomAdd(int nparam, void **params, int *ndims, int64_t **shapes, const char **dtypes, void *stream,
                         void *extra) {
  cudaStream_t custream = static_cast<cudaStream_t>(stream);
-  if (nparam != 3) return 1;
-  void *input1 = params[0];
-  void *input2 = params[1];
-  void *output = params[2];
-  size_t size = 1;
+  constexpr int OUTPUT_INDEX = 2;
+  constexpr int TOTAL_PARAM_NUM = 3;

-  for (int i = 0; i < ndims[2]; i++) {
-    size *= shapes[2][i];
+  // Users can add any check on their need. If check fails, user can return any value larger than 0 to safely exit.
+  // Return value larger than 0 will cause mindspore to stop computing and safely exit.
+  // Specially, return 1 will show log: "Number of parameters passed is inconsistent with what the user wants".
+  // return 2 will show log: "Type of parameters passed is inconsistent with what the user wants".
+
+  // This is to check if the num of parameters the same as what the user wants.
+  // There are two inputs and one output, so the nparam should be 3.
+  if (nparam != TOTAL_PARAM_NUM) {
+    return 1;
  }
-  int n = size / THREADS;
+
+  // This is to check if the type of parameters the same as what the user wants.
  for (int i = 0; i < nparam; i++) {
    if (strcmp(dtypes[i], "float32") != 0) {
      return 2;
    }
  }
+
+  // input1's index is 0, input2's index is 1 and output's index is 2
+  void *input1 = params[0];
+  void *input2 = params[1];
+  void *output = params[2];
+  size_t size = 1;
+
+  // Cumprod of output's shape to compute elements' num
+  for (int i = 0; i < ndims[OUTPUT_INDEX]; i++) {
+    size *= shapes[OUTPUT_INDEX][i];
+  }
+
+  int n = size / THREADS;
+
+  // Do the computation
  CustomAddKernel<<<n + 1, THREADS, 0, custream>>>(static_cast<float *>(input1), static_cast<float *>(input2),
                                                   static_cast<float *>(output), size);
+
+  // When return 0, mindspore will continue to run if this kernel could launch successfully.
  return 0;
 }
--- a/tests/st/ops/graph_kernel/custom/aot_test_files/add_mul_div.cu
+++ b/tests/st/ops/graph_kernel/custom/aot_test_files/add_mul_div.cu
@ -14,7 +14,8 @@
 * limitations under the License.
 */

-#define THREADS 1024
+constexpr int THREADS = 1024;
+
 __global__ void CustomAddMulDivKernel(float *input1, float *input2, float *output1, float *output2, float *output3,
                                      size_t size) {
  auto idx = blockIdx.x * THREADS + threadIdx.x;
@ -28,7 +29,29 @@ __global__ void CustomAddMulDivKernel(float *input1, float *input2, float *outpu
 extern "C" int CustomAddMulDiv(int nparam, void **params, int *ndims, int64_t **shapes, const char **dtypes,
                               void *stream, void *extra) {
  cudaStream_t custream = static_cast<cudaStream_t>(stream);
-  if (nparam != 5) return 1;
+
+  constexpr int OUTPUT_INDEX = 2;
+  constexpr int TOTAL_PARAM_NUM = 5;
+
+  // Users can add any check on their need. If check fails, user can return any value larger than 0 to safely exit.
+  // Return value larger than 0 will cause mindspore to stop computing and safely exit.
+  // Specially, return 1 will show log: "Number of parameters passed is inconsistent with what the user wants".
+  // return 2 will show log: "Type of parameters passed is inconsistent with what the user wants".
+
+  // This is to check if the num of parameters the same as what the user wants.
+  // There are two inputs and three outputs, so the nparam should be 5.
+  if (nparam != TOTAL_PARAM_NUM) {
+    return 1;
+  }
+
+  // This is to check if the type of parameters the same as what the user wants.
+  for (int i = 0; i < nparam; i++) {
+    if (strcmp(dtypes[i], "float32") != 0) {
+      return 2;
+    }
+  }
+
+  // input1's index is 0, input2's index is 1, output1's index is 2, output2's index is 3 and output3's index is 4
  void *input1 = params[0];
  void *input2 = params[1];
  void *output1 = params[2];
@ -36,19 +59,16 @@ extern "C" int CustomAddMulDiv(int nparam, void **params, int *ndims, int64_t **
  void *output3 = params[4];
  size_t size = 1;

-  for (int i = 0; i < ndims[2]; i++) {
-    size *= shapes[2][i];
+  // Cumprod of output's shape to compute elements' num
+  for (int i = 0; i < ndims[OUTPUT_INDEX]; i++) {
+    size *= shapes[OUTPUT_INDEX][i];
  }
  int n = size / THREADS;
-  for (int i = 0; i < nparam; i++) {
-    if (strcmp(dtypes[i], "float32") != 0) {
-      return 2;
-    }
-  }

+  // Do the computation
  CustomAddMulDivKernel<<<n + 1, THREADS, 0, custream>>>(static_cast<float *>(input1), static_cast<float *>(input2),
                                                         static_cast<float *>(output1), static_cast<float *>(output2),
                                                         static_cast<float *>(output3), size);
-
+  // When return 0, mindspore will continue to run if this kernel could launch successfully.
  return 0;
 }
--- a/tests/st/ops/graph_kernel/custom/aot_test_files/add_mul_div_bprop.cu
+++ b/tests/st/ops/graph_kernel/custom/aot_test_files/add_mul_div_bprop.cu
@ -14,7 +14,7 @@
 * limitations under the License.
 */

-#define THREADS 1024
+constexpr int THREADS = 1024;

 __global__ void CustomAddMulDivBpropKernel(float *input1, float *input2, float *input3, float *input4, float *input5,
                                           float *output1, float *output2, size_t size) {
@ -28,7 +28,30 @@ __global__ void CustomAddMulDivBpropKernel(float *input1, float *input2, float *
 extern "C" int CustomAddMulDivBprop(int nparam, void **params, int *ndims, int64_t **shapes, const char **dtypes,
                                    void *stream, void *extra) {
  cudaStream_t custream = static_cast<cudaStream_t>(stream);
-  if (nparam != 7) return 1;
+
+  constexpr int OUTPUT_INDEX = 6;
+  constexpr int TOTAL_PARAM_NUM = 7;
+
+  // Users can add any check on their need. If check fails, user can return any value larger than 0 to safely exit.
+  // Return value larger than 0 will cause mindspore to stop computing and safely exit.
+  // Specially, return 1 will show log: "Number of parameters passed is inconsistent with what the user wants".
+  // return 2 will show log: "Type of parameters passed is inconsistent with what the user wants".
+
+  // This is to check if the num of parameters the same as what the user wants.
+  // There are five inputs and two outputs, so the nparam should be 7.
+  if (nparam != TOTAL_PARAM_NUM) {
+    return 1;
+  }
+
+  // This is to check if the type of parameters the same as what the user wants.
+  for (int i = 0; i < nparam; i++) {
+    if (strcmp(dtypes[i], "float32") != 0) {
+      return 2;
+    }
+  }
+
+  // input1's index is 0, input2's index is 1, input3's index is 2, input4's index is 3, input5's index is 4
+  // output1's index is 5 and output2's index is 6
  void *input1 = params[0];
  void *input2 = params[1];
  void *input3 = params[2];
@ -39,20 +62,18 @@ extern "C" int CustomAddMulDivBprop(int nparam, void **params, int *ndims, int64

  size_t size = 1;

-  for (int i = 0; i < ndims[6]; i++) {
-    size *= shapes[6][i];
+  // Cumprod of output's shape to compute elements' num
+  for (int i = 0; i < ndims[OUTPUT_INDEX]; i++) {
+    size *= shapes[OUTPUT_INDEX][i];
  }
  int n = size / THREADS;
-  for (int i = 0; i < nparam; i++) {
-    if (strcmp(dtypes[i], "float32") != 0) {
-      return 2;
-    }
-  }

+  // Do the computation
  CustomAddMulDivBpropKernel<<<n + 1, THREADS, 0, custream>>>(
    static_cast<float *>(input1), static_cast<float *>(input2), static_cast<float *>(input3),
    static_cast<float *>(input4), static_cast<float *>(input5), static_cast<float *>(output1),
    static_cast<float *>(output2), size);

+  // When return 0, mindspore will continue to run if this kernel could launch successfully.
  return 0;
 }
--- a/tests/st/ops/graph_kernel/custom/aot_test_files/hetero_square_mul.cu
+++ b/tests/st/ops/graph_kernel/custom/aot_test_files/hetero_square_mul.cu
@ -13,9 +13,9 @@
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
+constexpr int THREADS = 1024;

 #include <cuda_fp16.h>
-#define THREADS 1024

 __global__ void CustomHSquareMulKernel(float *input1, half *input2, half *output, size_t size) {
  auto idx = blockIdx.x * THREADS + threadIdx.x;
@ -27,18 +27,22 @@ __global__ void CustomHSquareMulKernel(float *input1, half *input2, half *output
 extern "C" int CustomHSquareMul(int nparam, void **params, int *ndims, int64_t **shapes, const char **dtypes,
                                void *stream, void *extra) {
  cudaStream_t custream = static_cast<cudaStream_t>(stream);
-  if (nparam != 3) return 1;
-  void *input1 = params[0];
-  void *input2 = params[1];

-  void *output = params[2];
-  size_t size = 1;
+  constexpr int OUTPUT_INDEX = 2;
+  constexpr int TOTAL_PARAM_NUM = 3;

-  for (int i = 0; i < ndims[2]; i++) {
-    size *= shapes[2][i];
+  // Users can add any check on their need. If check fails, user can return any value larger than 0 to safely exit.
+  // Return value larger than 0 will cause mindspore to stop computing and safely exit.
+  // Specially, return 1 will show log: "Number of parameters passed is inconsistent with what the user wants".
+  // return 2 will show log: "Type of parameters passed is inconsistent with what the user wants".
+
+  // This is to check if the num of parameters the same as what the user wants.
+  // There are two inputs and one output, so the nparam should be 3.
+  if (nparam != TOTAL_PARAM_NUM) {
+    return 1;
  }
-  int n = size / THREADS;

+  // This is to check if the type of parameters the same as what the user wants.
  if (strcmp(dtypes[0], "float32") != 0) {
    return 2;
  }
@ -49,7 +53,23 @@ extern "C" int CustomHSquareMul(int nparam, void **params, int *ndims, int64_t *
    return 2;
  }

+  // input1's index is 0, input2's index is 1 and output's index is 2
+  void *input1 = params[0];
+  void *input2 = params[1];
+
+  void *output = params[2];
+  size_t size = 1;
+
+  // Cumprod of output's shape to compute elements' num
+  for (int i = 0; i < ndims[OUTPUT_INDEX]; i++) {
+    size *= shapes[OUTPUT_INDEX][i];
+  }
+  int n = size / THREADS;
+
+  // Do the computation
  CustomHSquareMulKernel<<<n + 1, THREADS, 0, custream>>>(static_cast<float *>(input1), static_cast<half *>(input2),
                                                          static_cast<half *>(output), size);
+
+  // When return 0, mindspore will continue to run if this kernel could launch successfully.
  return 0;
 }
--- a/tests/st/ops/graph_kernel/custom/aot_test_files/reorganize.cu
+++ b/tests/st/ops/graph_kernel/custom/aot_test_files/reorganize.cu
@ -14,10 +14,12 @@
 * limitations under the License.
 */

-#define THREADS 1024
+constexpr int THREADS = 1024;
+
 __global__ void CustomReorganizeKernel(float *input1, int64_t *input2, float *output, size_t size) {
  auto idx = blockIdx.x * THREADS + threadIdx.x;
  if (idx < size) {
+    // Reorganize
    output[idx] = input1[input2[idx]];
  }
 }
@ -25,19 +27,22 @@ __global__ void CustomReorganizeKernel(float *input1, int64_t *input2, float *ou
 extern "C" int CustomReorganize(int nparam, void **params, int *ndims, int64_t **shapes, const char **dtypes,
                                void *stream, void *extra) {
  cudaStream_t custream = static_cast<cudaStream_t>(stream);
-  if (nparam != 3) return 1;
-  void *input1 = params[0];
-  void *input2 = params[1];

-  void *output = params[2];
+  constexpr int OUTPUT_INDEX = 2;
+  constexpr int TOTAL_PARAM_NUM = 3;

-  size_t size = 1;
+  // Users can add any check on their need. If check fails, user can return any value larger than 0 to safely exit.
+  // Return value larger than 0 will cause mindspore to stop computing and safely exit.
+  // Specially, return 1 will show log: "Number of parameters passed is inconsistent with what the user wants".
+  // return 2 will show log: "Type of parameters passed is inconsistent with what the user wants".

-  for (int i = 0; i < ndims[2]; i++) {
-    size *= shapes[2][i];
+  // This is to check if the num of parameters the same as what the user wants.
+  // There are two inputs and one output, so the nparam should be 3.
+  if (nparam != TOTAL_PARAM_NUM) {
+    return 1;
  }
-  int n = size / THREADS;
-
+ 
+  // This is to check if the type of parameters the same as what the user wants.
  if (strcmp(dtypes[0], "float32") != 0) {
    return 2;
  }
@ -47,8 +52,25 @@ extern "C" int CustomReorganize(int nparam, void **params, int *ndims, int64_t *
  if (strcmp(dtypes[2], "float32") != 0) {
    return 2;
  }
+
+  // input1's index is 0, input2's index is 1 and output's index is 2
+  void *input1 = params[0];
+  void *input2 = params[1];
+
+  void *output = params[2];
+
+  size_t size = 1;
+
+  // Cumprod of output's shape to compute elements' num
+  for (int i = 0; i < ndims[OUTPUT_INDEX]; i++) {
+    size *= shapes[OUTPUT_INDEX][i];
+  }
+  int n = size / THREADS;
+
+  // Do the computation
  CustomReorganizeKernel<<<n + 1, THREADS, 0, custream>>>(static_cast<float *>(input1), static_cast<int64_t *>(input2),
                                                          static_cast<float *>(output), size);

+  // When return 0, mindspore will continue to run if this kernel could launch successfully.
  return 0;
 }
--- a/tests/st/ops/graph_kernel/custom/aot_test_files/square.cu
+++ b/tests/st/ops/graph_kernel/custom/aot_test_files/square.cu
@ -13,9 +13,11 @@
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
-#define THREADS 1024
+constexpr int THREADS = 1024;
+
 __global__ void CustomSquareKernel(float *input1, float *output, size_t size) {
  auto idx = blockIdx.x * THREADS + threadIdx.x;
+  // Square
  if (idx < size) {
    output[idx] = input1[idx] * input1[idx];
  }
@ -24,22 +26,42 @@ __global__ void CustomSquareKernel(float *input1, float *output, size_t size) {
 extern "C" int CustomSquare(int nparam, void **params, int *ndims, int64_t **shapes, const char **dtypes, void *stream,
                            void *extra) {
  cudaStream_t custream = static_cast<cudaStream_t>(stream);
-  if (nparam != 2) return 1;
-  void *input1 = params[0];
-  void *output = params[1];
+  constexpr int OUTPUT_INDEX = 1;
+  constexpr int TOTAL_PARAM_NUM = 2;

-  size_t size = 1;
+  // Users can add any check on their need. If check fails, user can return any value larger than 0 to safely exit.
+  // Any return value larger than 0 will cause mindspore to stop computing and safely exit.
+  // Specially, return 1 will show log: "Number of parameters passed is inconsistent with what the user wants".
+  // return 2 will show log: "Type of parameters passed is inconsistent with what the user wants".

-  for (int i = 0; i < ndims[1]; i++) {
-    size *= shapes[1][i];
+  // This is to check if the num of parameters the same as what the user wants.
+  // There are one input and one output, so the nparam should be 2.
+  if (nparam != TOTAL_PARAM_NUM) {
+    return 1;
  }
-  int n = size / THREADS;
+
+  // This is to check if the type of parameters the same as what the user wants.
  for (int i = 0; i < nparam; i++) {
    if (strcmp(dtypes[i], "float32") != 0) {
      return 2;
    }
  }

+  // input1's index is 0, output's index is 1
+  void *input1 = params[0];
+  void *output = params[1];
+
+  size_t size = 1;
+
+  // Cumprod of output's shape to compute elements' num
+  for (int i = 0; i < ndims[OUTPUT_INDEX]; i++) {
+    size *= shapes[OUTPUT_INDEX][i];
+  }
+  int n = size / THREADS;
+
+  // Do the computation
  CustomSquareKernel<<<n + 1, THREADS, 0, custream>>>(static_cast<float *>(input1), static_cast<float *>(output), size);
+
+  // When return 0, mindspore will continue to run if this kernel could launch successfully.
  return 0;
 }
--- a/tests/st/ops/graph_kernel/custom/aot_test_files/square_bprop.cu
+++ b/tests/st/ops/graph_kernel/custom/aot_test_files/square_bprop.cu
@ -14,7 +14,8 @@
 * limitations under the License.
 */

-#define THREADS 1024
+constexpr int THREADS = 1024;
+
 __global__ void CustomSquareBpropKernel(float *input1, float *input3, float *output, size_t size) {
  auto idx = blockIdx.x * THREADS + threadIdx.x;
  if (idx < size) {
@ -25,24 +26,46 @@ __global__ void CustomSquareBpropKernel(float *input1, float *input3, float *out
 extern "C" int CustomSquareBprop(int nparam, void **params, int *ndims, int64_t **shapes, const char **dtypes,
                                 void *stream, void *extra) {
  cudaStream_t custream = static_cast<cudaStream_t>(stream);
-  if (nparam != 4) return 1;
-  void *input1 = params[0];
-  void *input3 = params[2];
-  void *output = params[3];

-  size_t size = 1;
+  constexpr int OUTPUT_INDEX = 3;
+  constexpr int TOTAL_PARAM_NUM = 4;

-  for (int i = 0; i < ndims[3]; i++) {
-    size *= shapes[3][i];
+  // Users can add any check on their need. If check fails, user can return any value larger than 0 to safely exit.
+  // Return value larger than 0 will cause mindspore to stop computing and safely exit.
+  // Specially, return 1 will show log: "Number of parameters passed is inconsistent with what the user wants".
+  // return 2 will show log: "Type of parameters passed is inconsistent with what the user wants".
+
+  // This is to check if the num of parameters the same as what the user wants.
+  // There are three inputs and one output, so the nparam should be 4.
+  if (nparam != TOTAL_PARAM_NUM) {
+    return 1;
  }
-  int n = size / THREADS;
+
+  // This is to check if the type of parameters the same as what the user wants.
  for (int i = 0; i < nparam; i++) {
    if (strcmp(dtypes[i], "float32") != 0) {
      return 2;
    }
  }

+  // input1's index is 0, input2's index is 1, input3's index is 2 and output's index is 3
+  void *input1 = params[0];
+  void *input3 = params[2];
+  void *output = params[3];
+
+  size_t size = 1;
+
+  // Cumprod of output's shape to compute elements'num
+  for (int i = 0; i < ndims[OUTPUT_INDEX]; i++) {
+    size *= shapes[OUTPUT_INDEX][i];
+  }
+
+  int n = size / THREADS;
+
+  // Do the computation
  CustomSquareBpropKernel<<<n + 1, THREADS, 0, custream>>>(static_cast<float *>(input1), static_cast<float *>(input3),
                                                           static_cast<float *>(output), size);
+
+  // When return 0, mindspore will continue to run if this kernel could launch successfully.
  return 0;
 }