add head for aot test

This commit is contained in:
Yang Jiao 2021-12-07 16:36:14 +08:00
parent 5acce31c72
commit d9531a4f73
8 changed files with 245 additions and 74 deletions

View File

@ -14,27 +14,48 @@
* limitations under the License.
*/
#include <string.h>
using size_t = decltype(sizeof(int));
using int64_t = decltype(sizeof(long));
#include <cstdint>
extern "C" int CustomAdd(int nparam, void **params, int *ndims, int64_t **shapes, const char **dtypes, void *stream,
void *extra) {
if (nparam != 3) return 1;
float *input1 = static_cast<float *>(params[0]);
float *input2 = static_cast<float *>(params[1]);
float *output = static_cast<float *>(params[2]);
size_t size = 1;
constexpr int OUTPUT_INDEX = 2;
constexpr int TOTAL_PARAM_NUM = 3;
for (int i = 0; i < ndims[2]; i++) {
size *= shapes[2][i];
// Users can add any check on their need. If check fails, user can return any value larger than 0 to safely exit.
// Return value larger than 0 will cause mindspore to stop computing and safely exit.
// Specially, return 1 will show log: "Number of parameters passed is inconsistent with what the user wants".
// return 2 will show log: "Type of parameters passed is inconsistent with what the user wants".
// This is to check if the num of parameters the same as what the user wants.
// In this case, there are two inputs and one output, so the nparam should be 3.
if (nparam != TOTAL_PARAM_NUM) {
return 1;
}
// This is to check if the type of parameters the same as what the user wants.
for (int i = 0; i < nparam; i++) {
if (strcmp(dtypes[i], "float32") != 0) {
return 2;
}
}
// input1's index is 0, input2's index is 1 and output's index is 2
float *input1 = static_cast<float *>(params[0]);
float *input2 = static_cast<float *>(params[1]);
float *output = static_cast<float *>(params[2]);
int size = 1;
// Cumprod of output's shape to compute elements' num
for (int i = 0; i < ndims[OUTPUT_INDEX]; i++) {
size *= shapes[OUTPUT_INDEX][i];
}
// Do the computation
// Add
for (int i = 0; i < size; i++) {
output[i] = input1[i] + input2[i];
}
// When return 0, mindspore will continue to run if this kernel could launch successfully.
return 0;
}

View File

@ -13,10 +13,10 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#define THREADS 1024
constexpr int THREADS = 1024;
__global__ void CustomAddKernel(float *input1, float *input2, float *output, size_t size) {
auto idx = blockIdx.x * THREADS + threadIdx.x;
// Add
if (idx < size) {
output[idx] = input1[idx] + input2[idx];
}
@ -25,22 +25,44 @@ __global__ void CustomAddKernel(float *input1, float *input2, float *output, siz
extern "C" int CustomAdd(int nparam, void **params, int *ndims, int64_t **shapes, const char **dtypes, void *stream,
void *extra) {
cudaStream_t custream = static_cast<cudaStream_t>(stream);
if (nparam != 3) return 1;
void *input1 = params[0];
void *input2 = params[1];
void *output = params[2];
size_t size = 1;
constexpr int OUTPUT_INDEX = 2;
constexpr int TOTAL_PARAM_NUM = 3;
for (int i = 0; i < ndims[2]; i++) {
size *= shapes[2][i];
// Users can add any check on their need. If check fails, user can return any value larger than 0 to safely exit.
// Return value larger than 0 will cause mindspore to stop computing and safely exit.
// Specially, return 1 will show log: "Number of parameters passed is inconsistent with what the user wants".
// return 2 will show log: "Type of parameters passed is inconsistent with what the user wants".
// This is to check if the num of parameters the same as what the user wants.
// There are two inputs and one output, so the nparam should be 3.
if (nparam != TOTAL_PARAM_NUM) {
return 1;
}
int n = size / THREADS;
// This is to check if the type of parameters the same as what the user wants.
for (int i = 0; i < nparam; i++) {
if (strcmp(dtypes[i], "float32") != 0) {
return 2;
}
}
// input1's index is 0, input2's index is 1 and output's index is 2
void *input1 = params[0];
void *input2 = params[1];
void *output = params[2];
size_t size = 1;
// Cumprod of output's shape to compute elements' num
for (int i = 0; i < ndims[OUTPUT_INDEX]; i++) {
size *= shapes[OUTPUT_INDEX][i];
}
int n = size / THREADS;
// Do the computation
CustomAddKernel<<<n + 1, THREADS, 0, custream>>>(static_cast<float *>(input1), static_cast<float *>(input2),
static_cast<float *>(output), size);
// When return 0, mindspore will continue to run if this kernel could launch successfully.
return 0;
}

View File

@ -14,7 +14,8 @@
* limitations under the License.
*/
#define THREADS 1024
constexpr int THREADS = 1024;
__global__ void CustomAddMulDivKernel(float *input1, float *input2, float *output1, float *output2, float *output3,
size_t size) {
auto idx = blockIdx.x * THREADS + threadIdx.x;
@ -28,7 +29,29 @@ __global__ void CustomAddMulDivKernel(float *input1, float *input2, float *outpu
extern "C" int CustomAddMulDiv(int nparam, void **params, int *ndims, int64_t **shapes, const char **dtypes,
void *stream, void *extra) {
cudaStream_t custream = static_cast<cudaStream_t>(stream);
if (nparam != 5) return 1;
constexpr int OUTPUT_INDEX = 2;
constexpr int TOTAL_PARAM_NUM = 5;
// Users can add any check on their need. If check fails, user can return any value larger than 0 to safely exit.
// Return value larger than 0 will cause mindspore to stop computing and safely exit.
// Specially, return 1 will show log: "Number of parameters passed is inconsistent with what the user wants".
// return 2 will show log: "Type of parameters passed is inconsistent with what the user wants".
// This is to check if the num of parameters the same as what the user wants.
// There are two inputs and three outputs, so the nparam should be 5.
if (nparam != TOTAL_PARAM_NUM) {
return 1;
}
// This is to check if the type of parameters the same as what the user wants.
for (int i = 0; i < nparam; i++) {
if (strcmp(dtypes[i], "float32") != 0) {
return 2;
}
}
// input1's index is 0, input2's index is 1, output1's index is 2, output2's index is 3 and output3's index is 4
void *input1 = params[0];
void *input2 = params[1];
void *output1 = params[2];
@ -36,19 +59,16 @@ extern "C" int CustomAddMulDiv(int nparam, void **params, int *ndims, int64_t **
void *output3 = params[4];
size_t size = 1;
for (int i = 0; i < ndims[2]; i++) {
size *= shapes[2][i];
// Cumprod of output's shape to compute elements' num
for (int i = 0; i < ndims[OUTPUT_INDEX]; i++) {
size *= shapes[OUTPUT_INDEX][i];
}
int n = size / THREADS;
for (int i = 0; i < nparam; i++) {
if (strcmp(dtypes[i], "float32") != 0) {
return 2;
}
}
// Do the computation
CustomAddMulDivKernel<<<n + 1, THREADS, 0, custream>>>(static_cast<float *>(input1), static_cast<float *>(input2),
static_cast<float *>(output1), static_cast<float *>(output2),
static_cast<float *>(output3), size);
// When return 0, mindspore will continue to run if this kernel could launch successfully.
return 0;
}

View File

@ -14,7 +14,7 @@
* limitations under the License.
*/
#define THREADS 1024
constexpr int THREADS = 1024;
__global__ void CustomAddMulDivBpropKernel(float *input1, float *input2, float *input3, float *input4, float *input5,
float *output1, float *output2, size_t size) {
@ -28,7 +28,30 @@ __global__ void CustomAddMulDivBpropKernel(float *input1, float *input2, float *
extern "C" int CustomAddMulDivBprop(int nparam, void **params, int *ndims, int64_t **shapes, const char **dtypes,
void *stream, void *extra) {
cudaStream_t custream = static_cast<cudaStream_t>(stream);
if (nparam != 7) return 1;
constexpr int OUTPUT_INDEX = 6;
constexpr int TOTAL_PARAM_NUM = 7;
// Users can add any check on their need. If check fails, user can return any value larger than 0 to safely exit.
// Return value larger than 0 will cause mindspore to stop computing and safely exit.
// Specially, return 1 will show log: "Number of parameters passed is inconsistent with what the user wants".
// return 2 will show log: "Type of parameters passed is inconsistent with what the user wants".
// This is to check if the num of parameters the same as what the user wants.
// There are five inputs and two outputs, so the nparam should be 7.
if (nparam != TOTAL_PARAM_NUM) {
return 1;
}
// This is to check if the type of parameters the same as what the user wants.
for (int i = 0; i < nparam; i++) {
if (strcmp(dtypes[i], "float32") != 0) {
return 2;
}
}
// input1's index is 0, input2's index is 1, input3's index is 2, input4's index is 3, input5's index is 4
// output1's index is 5 and output2's index is 6
void *input1 = params[0];
void *input2 = params[1];
void *input3 = params[2];
@ -39,20 +62,18 @@ extern "C" int CustomAddMulDivBprop(int nparam, void **params, int *ndims, int64
size_t size = 1;
for (int i = 0; i < ndims[6]; i++) {
size *= shapes[6][i];
// Cumprod of output's shape to compute elements' num
for (int i = 0; i < ndims[OUTPUT_INDEX]; i++) {
size *= shapes[OUTPUT_INDEX][i];
}
int n = size / THREADS;
for (int i = 0; i < nparam; i++) {
if (strcmp(dtypes[i], "float32") != 0) {
return 2;
}
}
// Do the computation
CustomAddMulDivBpropKernel<<<n + 1, THREADS, 0, custream>>>(
static_cast<float *>(input1), static_cast<float *>(input2), static_cast<float *>(input3),
static_cast<float *>(input4), static_cast<float *>(input5), static_cast<float *>(output1),
static_cast<float *>(output2), size);
// When return 0, mindspore will continue to run if this kernel could launch successfully.
return 0;
}

View File

@ -13,9 +13,9 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
constexpr int THREADS = 1024;
#include <cuda_fp16.h>
#define THREADS 1024
__global__ void CustomHSquareMulKernel(float *input1, half *input2, half *output, size_t size) {
auto idx = blockIdx.x * THREADS + threadIdx.x;
@ -27,18 +27,22 @@ __global__ void CustomHSquareMulKernel(float *input1, half *input2, half *output
extern "C" int CustomHSquareMul(int nparam, void **params, int *ndims, int64_t **shapes, const char **dtypes,
void *stream, void *extra) {
cudaStream_t custream = static_cast<cudaStream_t>(stream);
if (nparam != 3) return 1;
void *input1 = params[0];
void *input2 = params[1];
void *output = params[2];
size_t size = 1;
constexpr int OUTPUT_INDEX = 2;
constexpr int TOTAL_PARAM_NUM = 3;
for (int i = 0; i < ndims[2]; i++) {
size *= shapes[2][i];
// Users can add any check on their need. If check fails, user can return any value larger than 0 to safely exit.
// Return value larger than 0 will cause mindspore to stop computing and safely exit.
// Specially, return 1 will show log: "Number of parameters passed is inconsistent with what the user wants".
// return 2 will show log: "Type of parameters passed is inconsistent with what the user wants".
// This is to check if the num of parameters the same as what the user wants.
// There are two inputs and one output, so the nparam should be 3.
if (nparam != TOTAL_PARAM_NUM) {
return 1;
}
int n = size / THREADS;
// This is to check if the type of parameters the same as what the user wants.
if (strcmp(dtypes[0], "float32") != 0) {
return 2;
}
@ -49,7 +53,23 @@ extern "C" int CustomHSquareMul(int nparam, void **params, int *ndims, int64_t *
return 2;
}
// input1's index is 0, input2's index is 1 and output's index is 2
void *input1 = params[0];
void *input2 = params[1];
void *output = params[2];
size_t size = 1;
// Cumprod of output's shape to compute elements' num
for (int i = 0; i < ndims[OUTPUT_INDEX]; i++) {
size *= shapes[OUTPUT_INDEX][i];
}
int n = size / THREADS;
// Do the computation
CustomHSquareMulKernel<<<n + 1, THREADS, 0, custream>>>(static_cast<float *>(input1), static_cast<half *>(input2),
static_cast<half *>(output), size);
// When return 0, mindspore will continue to run if this kernel could launch successfully.
return 0;
}

View File

@ -14,10 +14,12 @@
* limitations under the License.
*/
#define THREADS 1024
constexpr int THREADS = 1024;
__global__ void CustomReorganizeKernel(float *input1, int64_t *input2, float *output, size_t size) {
auto idx = blockIdx.x * THREADS + threadIdx.x;
if (idx < size) {
// Reorganize
output[idx] = input1[input2[idx]];
}
}
@ -25,19 +27,22 @@ __global__ void CustomReorganizeKernel(float *input1, int64_t *input2, float *ou
extern "C" int CustomReorganize(int nparam, void **params, int *ndims, int64_t **shapes, const char **dtypes,
void *stream, void *extra) {
cudaStream_t custream = static_cast<cudaStream_t>(stream);
if (nparam != 3) return 1;
void *input1 = params[0];
void *input2 = params[1];
void *output = params[2];
constexpr int OUTPUT_INDEX = 2;
constexpr int TOTAL_PARAM_NUM = 3;
size_t size = 1;
// Users can add any check on their need. If check fails, user can return any value larger than 0 to safely exit.
// Return value larger than 0 will cause mindspore to stop computing and safely exit.
// Specially, return 1 will show log: "Number of parameters passed is inconsistent with what the user wants".
// return 2 will show log: "Type of parameters passed is inconsistent with what the user wants".
for (int i = 0; i < ndims[2]; i++) {
size *= shapes[2][i];
// This is to check if the num of parameters the same as what the user wants.
// There are two inputs and one output, so the nparam should be 3.
if (nparam != TOTAL_PARAM_NUM) {
return 1;
}
int n = size / THREADS;
// This is to check if the type of parameters the same as what the user wants.
if (strcmp(dtypes[0], "float32") != 0) {
return 2;
}
@ -47,8 +52,25 @@ extern "C" int CustomReorganize(int nparam, void **params, int *ndims, int64_t *
if (strcmp(dtypes[2], "float32") != 0) {
return 2;
}
// input1's index is 0, input2's index is 1 and output's index is 2
void *input1 = params[0];
void *input2 = params[1];
void *output = params[2];
size_t size = 1;
// Cumprod of output's shape to compute elements' num
for (int i = 0; i < ndims[OUTPUT_INDEX]; i++) {
size *= shapes[OUTPUT_INDEX][i];
}
int n = size / THREADS;
// Do the computation
CustomReorganizeKernel<<<n + 1, THREADS, 0, custream>>>(static_cast<float *>(input1), static_cast<int64_t *>(input2),
static_cast<float *>(output), size);
// When return 0, mindspore will continue to run if this kernel could launch successfully.
return 0;
}

View File

@ -13,9 +13,11 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#define THREADS 1024
constexpr int THREADS = 1024;
__global__ void CustomSquareKernel(float *input1, float *output, size_t size) {
auto idx = blockIdx.x * THREADS + threadIdx.x;
// Square
if (idx < size) {
output[idx] = input1[idx] * input1[idx];
}
@ -24,22 +26,42 @@ __global__ void CustomSquareKernel(float *input1, float *output, size_t size) {
extern "C" int CustomSquare(int nparam, void **params, int *ndims, int64_t **shapes, const char **dtypes, void *stream,
void *extra) {
cudaStream_t custream = static_cast<cudaStream_t>(stream);
if (nparam != 2) return 1;
void *input1 = params[0];
void *output = params[1];
constexpr int OUTPUT_INDEX = 1;
constexpr int TOTAL_PARAM_NUM = 2;
size_t size = 1;
// Users can add any check on their need. If check fails, user can return any value larger than 0 to safely exit.
// Any return value larger than 0 will cause mindspore to stop computing and safely exit.
// Specially, return 1 will show log: "Number of parameters passed is inconsistent with what the user wants".
// return 2 will show log: "Type of parameters passed is inconsistent with what the user wants".
for (int i = 0; i < ndims[1]; i++) {
size *= shapes[1][i];
// This is to check if the num of parameters the same as what the user wants.
// There are one input and one output, so the nparam should be 2.
if (nparam != TOTAL_PARAM_NUM) {
return 1;
}
int n = size / THREADS;
// This is to check if the type of parameters the same as what the user wants.
for (int i = 0; i < nparam; i++) {
if (strcmp(dtypes[i], "float32") != 0) {
return 2;
}
}
// input1's index is 0, output's index is 1
void *input1 = params[0];
void *output = params[1];
size_t size = 1;
// Cumprod of output's shape to compute elements' num
for (int i = 0; i < ndims[OUTPUT_INDEX]; i++) {
size *= shapes[OUTPUT_INDEX][i];
}
int n = size / THREADS;
// Do the computation
CustomSquareKernel<<<n + 1, THREADS, 0, custream>>>(static_cast<float *>(input1), static_cast<float *>(output), size);
// When return 0, mindspore will continue to run if this kernel could launch successfully.
return 0;
}

View File

@ -14,7 +14,8 @@
* limitations under the License.
*/
#define THREADS 1024
constexpr int THREADS = 1024;
__global__ void CustomSquareBpropKernel(float *input1, float *input3, float *output, size_t size) {
auto idx = blockIdx.x * THREADS + threadIdx.x;
if (idx < size) {
@ -25,24 +26,46 @@ __global__ void CustomSquareBpropKernel(float *input1, float *input3, float *out
extern "C" int CustomSquareBprop(int nparam, void **params, int *ndims, int64_t **shapes, const char **dtypes,
void *stream, void *extra) {
cudaStream_t custream = static_cast<cudaStream_t>(stream);
if (nparam != 4) return 1;
void *input1 = params[0];
void *input3 = params[2];
void *output = params[3];
size_t size = 1;
constexpr int OUTPUT_INDEX = 3;
constexpr int TOTAL_PARAM_NUM = 4;
for (int i = 0; i < ndims[3]; i++) {
size *= shapes[3][i];
// Users can add any check on their need. If check fails, user can return any value larger than 0 to safely exit.
// Return value larger than 0 will cause mindspore to stop computing and safely exit.
// Specially, return 1 will show log: "Number of parameters passed is inconsistent with what the user wants".
// return 2 will show log: "Type of parameters passed is inconsistent with what the user wants".
// This is to check if the num of parameters the same as what the user wants.
// There are three inputs and one output, so the nparam should be 4.
if (nparam != TOTAL_PARAM_NUM) {
return 1;
}
int n = size / THREADS;
// This is to check if the type of parameters the same as what the user wants.
for (int i = 0; i < nparam; i++) {
if (strcmp(dtypes[i], "float32") != 0) {
return 2;
}
}
// input1's index is 0, input2's index is 1, input3's index is 2 and output's index is 3
void *input1 = params[0];
void *input3 = params[2];
void *output = params[3];
size_t size = 1;
// Cumprod of output's shape to compute elements'num
for (int i = 0; i < ndims[OUTPUT_INDEX]; i++) {
size *= shapes[OUTPUT_INDEX][i];
}
int n = size / THREADS;
// Do the computation
CustomSquareBpropKernel<<<n + 1, THREADS, 0, custream>>>(static_cast<float *>(input1), static_cast<float *>(input3),
static_cast<float *>(output), size);
// When return 0, mindspore will continue to run if this kernel could launch successfully.
return 0;
}