add head for aot test
This commit is contained in:
parent
5acce31c72
commit
d9531a4f73
|
@ -14,27 +14,48 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
#include <string.h>
|
||||
using size_t = decltype(sizeof(int));
|
||||
using int64_t = decltype(sizeof(long));
|
||||
#include <cstdint>
|
||||
|
||||
extern "C" int CustomAdd(int nparam, void **params, int *ndims, int64_t **shapes, const char **dtypes, void *stream,
|
||||
void *extra) {
|
||||
if (nparam != 3) return 1;
|
||||
float *input1 = static_cast<float *>(params[0]);
|
||||
float *input2 = static_cast<float *>(params[1]);
|
||||
float *output = static_cast<float *>(params[2]);
|
||||
size_t size = 1;
|
||||
constexpr int OUTPUT_INDEX = 2;
|
||||
constexpr int TOTAL_PARAM_NUM = 3;
|
||||
|
||||
for (int i = 0; i < ndims[2]; i++) {
|
||||
size *= shapes[2][i];
|
||||
// Users can add any check on their need. If check fails, user can return any value larger than 0 to safely exit.
|
||||
// Return value larger than 0 will cause mindspore to stop computing and safely exit.
|
||||
// Specially, return 1 will show log: "Number of parameters passed is inconsistent with what the user wants".
|
||||
// return 2 will show log: "Type of parameters passed is inconsistent with what the user wants".
|
||||
|
||||
// This is to check if the num of parameters the same as what the user wants.
|
||||
// In this case, there are two inputs and one output, so the nparam should be 3.
|
||||
if (nparam != TOTAL_PARAM_NUM) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
// This is to check if the type of parameters the same as what the user wants.
|
||||
for (int i = 0; i < nparam; i++) {
|
||||
if (strcmp(dtypes[i], "float32") != 0) {
|
||||
return 2;
|
||||
}
|
||||
}
|
||||
|
||||
// input1's index is 0, input2's index is 1 and output's index is 2
|
||||
float *input1 = static_cast<float *>(params[0]);
|
||||
float *input2 = static_cast<float *>(params[1]);
|
||||
float *output = static_cast<float *>(params[2]);
|
||||
int size = 1;
|
||||
|
||||
// Cumprod of output's shape to compute elements' num
|
||||
for (int i = 0; i < ndims[OUTPUT_INDEX]; i++) {
|
||||
size *= shapes[OUTPUT_INDEX][i];
|
||||
}
|
||||
|
||||
// Do the computation
|
||||
// Add
|
||||
for (int i = 0; i < size; i++) {
|
||||
output[i] = input1[i] + input2[i];
|
||||
}
|
||||
|
||||
// When return 0, mindspore will continue to run if this kernel could launch successfully.
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -13,10 +13,10 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#define THREADS 1024
|
||||
constexpr int THREADS = 1024;
|
||||
__global__ void CustomAddKernel(float *input1, float *input2, float *output, size_t size) {
|
||||
auto idx = blockIdx.x * THREADS + threadIdx.x;
|
||||
// Add
|
||||
if (idx < size) {
|
||||
output[idx] = input1[idx] + input2[idx];
|
||||
}
|
||||
|
@ -25,22 +25,44 @@ __global__ void CustomAddKernel(float *input1, float *input2, float *output, siz
|
|||
extern "C" int CustomAdd(int nparam, void **params, int *ndims, int64_t **shapes, const char **dtypes, void *stream,
|
||||
void *extra) {
|
||||
cudaStream_t custream = static_cast<cudaStream_t>(stream);
|
||||
if (nparam != 3) return 1;
|
||||
void *input1 = params[0];
|
||||
void *input2 = params[1];
|
||||
void *output = params[2];
|
||||
size_t size = 1;
|
||||
constexpr int OUTPUT_INDEX = 2;
|
||||
constexpr int TOTAL_PARAM_NUM = 3;
|
||||
|
||||
for (int i = 0; i < ndims[2]; i++) {
|
||||
size *= shapes[2][i];
|
||||
// Users can add any check on their need. If check fails, user can return any value larger than 0 to safely exit.
|
||||
// Return value larger than 0 will cause mindspore to stop computing and safely exit.
|
||||
// Specially, return 1 will show log: "Number of parameters passed is inconsistent with what the user wants".
|
||||
// return 2 will show log: "Type of parameters passed is inconsistent with what the user wants".
|
||||
|
||||
// This is to check if the num of parameters the same as what the user wants.
|
||||
// There are two inputs and one output, so the nparam should be 3.
|
||||
if (nparam != TOTAL_PARAM_NUM) {
|
||||
return 1;
|
||||
}
|
||||
int n = size / THREADS;
|
||||
|
||||
// This is to check if the type of parameters the same as what the user wants.
|
||||
for (int i = 0; i < nparam; i++) {
|
||||
if (strcmp(dtypes[i], "float32") != 0) {
|
||||
return 2;
|
||||
}
|
||||
}
|
||||
|
||||
// input1's index is 0, input2's index is 1 and output's index is 2
|
||||
void *input1 = params[0];
|
||||
void *input2 = params[1];
|
||||
void *output = params[2];
|
||||
size_t size = 1;
|
||||
|
||||
// Cumprod of output's shape to compute elements' num
|
||||
for (int i = 0; i < ndims[OUTPUT_INDEX]; i++) {
|
||||
size *= shapes[OUTPUT_INDEX][i];
|
||||
}
|
||||
|
||||
int n = size / THREADS;
|
||||
|
||||
// Do the computation
|
||||
CustomAddKernel<<<n + 1, THREADS, 0, custream>>>(static_cast<float *>(input1), static_cast<float *>(input2),
|
||||
static_cast<float *>(output), size);
|
||||
|
||||
// When return 0, mindspore will continue to run if this kernel could launch successfully.
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -14,7 +14,8 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#define THREADS 1024
|
||||
constexpr int THREADS = 1024;
|
||||
|
||||
__global__ void CustomAddMulDivKernel(float *input1, float *input2, float *output1, float *output2, float *output3,
|
||||
size_t size) {
|
||||
auto idx = blockIdx.x * THREADS + threadIdx.x;
|
||||
|
@ -28,7 +29,29 @@ __global__ void CustomAddMulDivKernel(float *input1, float *input2, float *outpu
|
|||
extern "C" int CustomAddMulDiv(int nparam, void **params, int *ndims, int64_t **shapes, const char **dtypes,
|
||||
void *stream, void *extra) {
|
||||
cudaStream_t custream = static_cast<cudaStream_t>(stream);
|
||||
if (nparam != 5) return 1;
|
||||
|
||||
constexpr int OUTPUT_INDEX = 2;
|
||||
constexpr int TOTAL_PARAM_NUM = 5;
|
||||
|
||||
// Users can add any check on their need. If check fails, user can return any value larger than 0 to safely exit.
|
||||
// Return value larger than 0 will cause mindspore to stop computing and safely exit.
|
||||
// Specially, return 1 will show log: "Number of parameters passed is inconsistent with what the user wants".
|
||||
// return 2 will show log: "Type of parameters passed is inconsistent with what the user wants".
|
||||
|
||||
// This is to check if the num of parameters the same as what the user wants.
|
||||
// There are two inputs and three outputs, so the nparam should be 5.
|
||||
if (nparam != TOTAL_PARAM_NUM) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
// This is to check if the type of parameters the same as what the user wants.
|
||||
for (int i = 0; i < nparam; i++) {
|
||||
if (strcmp(dtypes[i], "float32") != 0) {
|
||||
return 2;
|
||||
}
|
||||
}
|
||||
|
||||
// input1's index is 0, input2's index is 1, output1's index is 2, output2's index is 3 and output3's index is 4
|
||||
void *input1 = params[0];
|
||||
void *input2 = params[1];
|
||||
void *output1 = params[2];
|
||||
|
@ -36,19 +59,16 @@ extern "C" int CustomAddMulDiv(int nparam, void **params, int *ndims, int64_t **
|
|||
void *output3 = params[4];
|
||||
size_t size = 1;
|
||||
|
||||
for (int i = 0; i < ndims[2]; i++) {
|
||||
size *= shapes[2][i];
|
||||
// Cumprod of output's shape to compute elements' num
|
||||
for (int i = 0; i < ndims[OUTPUT_INDEX]; i++) {
|
||||
size *= shapes[OUTPUT_INDEX][i];
|
||||
}
|
||||
int n = size / THREADS;
|
||||
for (int i = 0; i < nparam; i++) {
|
||||
if (strcmp(dtypes[i], "float32") != 0) {
|
||||
return 2;
|
||||
}
|
||||
}
|
||||
|
||||
// Do the computation
|
||||
CustomAddMulDivKernel<<<n + 1, THREADS, 0, custream>>>(static_cast<float *>(input1), static_cast<float *>(input2),
|
||||
static_cast<float *>(output1), static_cast<float *>(output2),
|
||||
static_cast<float *>(output3), size);
|
||||
|
||||
// When return 0, mindspore will continue to run if this kernel could launch successfully.
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -14,7 +14,7 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#define THREADS 1024
|
||||
constexpr int THREADS = 1024;
|
||||
|
||||
__global__ void CustomAddMulDivBpropKernel(float *input1, float *input2, float *input3, float *input4, float *input5,
|
||||
float *output1, float *output2, size_t size) {
|
||||
|
@ -28,7 +28,30 @@ __global__ void CustomAddMulDivBpropKernel(float *input1, float *input2, float *
|
|||
extern "C" int CustomAddMulDivBprop(int nparam, void **params, int *ndims, int64_t **shapes, const char **dtypes,
|
||||
void *stream, void *extra) {
|
||||
cudaStream_t custream = static_cast<cudaStream_t>(stream);
|
||||
if (nparam != 7) return 1;
|
||||
|
||||
constexpr int OUTPUT_INDEX = 6;
|
||||
constexpr int TOTAL_PARAM_NUM = 7;
|
||||
|
||||
// Users can add any check on their need. If check fails, user can return any value larger than 0 to safely exit.
|
||||
// Return value larger than 0 will cause mindspore to stop computing and safely exit.
|
||||
// Specially, return 1 will show log: "Number of parameters passed is inconsistent with what the user wants".
|
||||
// return 2 will show log: "Type of parameters passed is inconsistent with what the user wants".
|
||||
|
||||
// This is to check if the num of parameters the same as what the user wants.
|
||||
// There are five inputs and two outputs, so the nparam should be 7.
|
||||
if (nparam != TOTAL_PARAM_NUM) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
// This is to check if the type of parameters the same as what the user wants.
|
||||
for (int i = 0; i < nparam; i++) {
|
||||
if (strcmp(dtypes[i], "float32") != 0) {
|
||||
return 2;
|
||||
}
|
||||
}
|
||||
|
||||
// input1's index is 0, input2's index is 1, input3's index is 2, input4's index is 3, input5's index is 4
|
||||
// output1's index is 5 and output2's index is 6
|
||||
void *input1 = params[0];
|
||||
void *input2 = params[1];
|
||||
void *input3 = params[2];
|
||||
|
@ -39,20 +62,18 @@ extern "C" int CustomAddMulDivBprop(int nparam, void **params, int *ndims, int64
|
|||
|
||||
size_t size = 1;
|
||||
|
||||
for (int i = 0; i < ndims[6]; i++) {
|
||||
size *= shapes[6][i];
|
||||
// Cumprod of output's shape to compute elements' num
|
||||
for (int i = 0; i < ndims[OUTPUT_INDEX]; i++) {
|
||||
size *= shapes[OUTPUT_INDEX][i];
|
||||
}
|
||||
int n = size / THREADS;
|
||||
for (int i = 0; i < nparam; i++) {
|
||||
if (strcmp(dtypes[i], "float32") != 0) {
|
||||
return 2;
|
||||
}
|
||||
}
|
||||
|
||||
// Do the computation
|
||||
CustomAddMulDivBpropKernel<<<n + 1, THREADS, 0, custream>>>(
|
||||
static_cast<float *>(input1), static_cast<float *>(input2), static_cast<float *>(input3),
|
||||
static_cast<float *>(input4), static_cast<float *>(input5), static_cast<float *>(output1),
|
||||
static_cast<float *>(output2), size);
|
||||
|
||||
// When return 0, mindspore will continue to run if this kernel could launch successfully.
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -13,9 +13,9 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
constexpr int THREADS = 1024;
|
||||
|
||||
#include <cuda_fp16.h>
|
||||
#define THREADS 1024
|
||||
|
||||
__global__ void CustomHSquareMulKernel(float *input1, half *input2, half *output, size_t size) {
|
||||
auto idx = blockIdx.x * THREADS + threadIdx.x;
|
||||
|
@ -27,18 +27,22 @@ __global__ void CustomHSquareMulKernel(float *input1, half *input2, half *output
|
|||
extern "C" int CustomHSquareMul(int nparam, void **params, int *ndims, int64_t **shapes, const char **dtypes,
|
||||
void *stream, void *extra) {
|
||||
cudaStream_t custream = static_cast<cudaStream_t>(stream);
|
||||
if (nparam != 3) return 1;
|
||||
void *input1 = params[0];
|
||||
void *input2 = params[1];
|
||||
|
||||
void *output = params[2];
|
||||
size_t size = 1;
|
||||
constexpr int OUTPUT_INDEX = 2;
|
||||
constexpr int TOTAL_PARAM_NUM = 3;
|
||||
|
||||
for (int i = 0; i < ndims[2]; i++) {
|
||||
size *= shapes[2][i];
|
||||
// Users can add any check on their need. If check fails, user can return any value larger than 0 to safely exit.
|
||||
// Return value larger than 0 will cause mindspore to stop computing and safely exit.
|
||||
// Specially, return 1 will show log: "Number of parameters passed is inconsistent with what the user wants".
|
||||
// return 2 will show log: "Type of parameters passed is inconsistent with what the user wants".
|
||||
|
||||
// This is to check if the num of parameters the same as what the user wants.
|
||||
// There are two inputs and one output, so the nparam should be 3.
|
||||
if (nparam != TOTAL_PARAM_NUM) {
|
||||
return 1;
|
||||
}
|
||||
int n = size / THREADS;
|
||||
|
||||
// This is to check if the type of parameters the same as what the user wants.
|
||||
if (strcmp(dtypes[0], "float32") != 0) {
|
||||
return 2;
|
||||
}
|
||||
|
@ -49,7 +53,23 @@ extern "C" int CustomHSquareMul(int nparam, void **params, int *ndims, int64_t *
|
|||
return 2;
|
||||
}
|
||||
|
||||
// input1's index is 0, input2's index is 1 and output's index is 2
|
||||
void *input1 = params[0];
|
||||
void *input2 = params[1];
|
||||
|
||||
void *output = params[2];
|
||||
size_t size = 1;
|
||||
|
||||
// Cumprod of output's shape to compute elements' num
|
||||
for (int i = 0; i < ndims[OUTPUT_INDEX]; i++) {
|
||||
size *= shapes[OUTPUT_INDEX][i];
|
||||
}
|
||||
int n = size / THREADS;
|
||||
|
||||
// Do the computation
|
||||
CustomHSquareMulKernel<<<n + 1, THREADS, 0, custream>>>(static_cast<float *>(input1), static_cast<half *>(input2),
|
||||
static_cast<half *>(output), size);
|
||||
|
||||
// When return 0, mindspore will continue to run if this kernel could launch successfully.
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -14,10 +14,12 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#define THREADS 1024
|
||||
constexpr int THREADS = 1024;
|
||||
|
||||
__global__ void CustomReorganizeKernel(float *input1, int64_t *input2, float *output, size_t size) {
|
||||
auto idx = blockIdx.x * THREADS + threadIdx.x;
|
||||
if (idx < size) {
|
||||
// Reorganize
|
||||
output[idx] = input1[input2[idx]];
|
||||
}
|
||||
}
|
||||
|
@ -25,19 +27,22 @@ __global__ void CustomReorganizeKernel(float *input1, int64_t *input2, float *ou
|
|||
extern "C" int CustomReorganize(int nparam, void **params, int *ndims, int64_t **shapes, const char **dtypes,
|
||||
void *stream, void *extra) {
|
||||
cudaStream_t custream = static_cast<cudaStream_t>(stream);
|
||||
if (nparam != 3) return 1;
|
||||
void *input1 = params[0];
|
||||
void *input2 = params[1];
|
||||
|
||||
void *output = params[2];
|
||||
constexpr int OUTPUT_INDEX = 2;
|
||||
constexpr int TOTAL_PARAM_NUM = 3;
|
||||
|
||||
size_t size = 1;
|
||||
// Users can add any check on their need. If check fails, user can return any value larger than 0 to safely exit.
|
||||
// Return value larger than 0 will cause mindspore to stop computing and safely exit.
|
||||
// Specially, return 1 will show log: "Number of parameters passed is inconsistent with what the user wants".
|
||||
// return 2 will show log: "Type of parameters passed is inconsistent with what the user wants".
|
||||
|
||||
for (int i = 0; i < ndims[2]; i++) {
|
||||
size *= shapes[2][i];
|
||||
// This is to check if the num of parameters the same as what the user wants.
|
||||
// There are two inputs and one output, so the nparam should be 3.
|
||||
if (nparam != TOTAL_PARAM_NUM) {
|
||||
return 1;
|
||||
}
|
||||
int n = size / THREADS;
|
||||
|
||||
|
||||
// This is to check if the type of parameters the same as what the user wants.
|
||||
if (strcmp(dtypes[0], "float32") != 0) {
|
||||
return 2;
|
||||
}
|
||||
|
@ -47,8 +52,25 @@ extern "C" int CustomReorganize(int nparam, void **params, int *ndims, int64_t *
|
|||
if (strcmp(dtypes[2], "float32") != 0) {
|
||||
return 2;
|
||||
}
|
||||
|
||||
// input1's index is 0, input2's index is 1 and output's index is 2
|
||||
void *input1 = params[0];
|
||||
void *input2 = params[1];
|
||||
|
||||
void *output = params[2];
|
||||
|
||||
size_t size = 1;
|
||||
|
||||
// Cumprod of output's shape to compute elements' num
|
||||
for (int i = 0; i < ndims[OUTPUT_INDEX]; i++) {
|
||||
size *= shapes[OUTPUT_INDEX][i];
|
||||
}
|
||||
int n = size / THREADS;
|
||||
|
||||
// Do the computation
|
||||
CustomReorganizeKernel<<<n + 1, THREADS, 0, custream>>>(static_cast<float *>(input1), static_cast<int64_t *>(input2),
|
||||
static_cast<float *>(output), size);
|
||||
|
||||
// When return 0, mindspore will continue to run if this kernel could launch successfully.
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -13,9 +13,11 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#define THREADS 1024
|
||||
constexpr int THREADS = 1024;
|
||||
|
||||
__global__ void CustomSquareKernel(float *input1, float *output, size_t size) {
|
||||
auto idx = blockIdx.x * THREADS + threadIdx.x;
|
||||
// Square
|
||||
if (idx < size) {
|
||||
output[idx] = input1[idx] * input1[idx];
|
||||
}
|
||||
|
@ -24,22 +26,42 @@ __global__ void CustomSquareKernel(float *input1, float *output, size_t size) {
|
|||
extern "C" int CustomSquare(int nparam, void **params, int *ndims, int64_t **shapes, const char **dtypes, void *stream,
|
||||
void *extra) {
|
||||
cudaStream_t custream = static_cast<cudaStream_t>(stream);
|
||||
if (nparam != 2) return 1;
|
||||
void *input1 = params[0];
|
||||
void *output = params[1];
|
||||
constexpr int OUTPUT_INDEX = 1;
|
||||
constexpr int TOTAL_PARAM_NUM = 2;
|
||||
|
||||
size_t size = 1;
|
||||
// Users can add any check on their need. If check fails, user can return any value larger than 0 to safely exit.
|
||||
// Any return value larger than 0 will cause mindspore to stop computing and safely exit.
|
||||
// Specially, return 1 will show log: "Number of parameters passed is inconsistent with what the user wants".
|
||||
// return 2 will show log: "Type of parameters passed is inconsistent with what the user wants".
|
||||
|
||||
for (int i = 0; i < ndims[1]; i++) {
|
||||
size *= shapes[1][i];
|
||||
// This is to check if the num of parameters the same as what the user wants.
|
||||
// There are one input and one output, so the nparam should be 2.
|
||||
if (nparam != TOTAL_PARAM_NUM) {
|
||||
return 1;
|
||||
}
|
||||
int n = size / THREADS;
|
||||
|
||||
// This is to check if the type of parameters the same as what the user wants.
|
||||
for (int i = 0; i < nparam; i++) {
|
||||
if (strcmp(dtypes[i], "float32") != 0) {
|
||||
return 2;
|
||||
}
|
||||
}
|
||||
|
||||
// input1's index is 0, output's index is 1
|
||||
void *input1 = params[0];
|
||||
void *output = params[1];
|
||||
|
||||
size_t size = 1;
|
||||
|
||||
// Cumprod of output's shape to compute elements' num
|
||||
for (int i = 0; i < ndims[OUTPUT_INDEX]; i++) {
|
||||
size *= shapes[OUTPUT_INDEX][i];
|
||||
}
|
||||
int n = size / THREADS;
|
||||
|
||||
// Do the computation
|
||||
CustomSquareKernel<<<n + 1, THREADS, 0, custream>>>(static_cast<float *>(input1), static_cast<float *>(output), size);
|
||||
|
||||
// When return 0, mindspore will continue to run if this kernel could launch successfully.
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -14,7 +14,8 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#define THREADS 1024
|
||||
constexpr int THREADS = 1024;
|
||||
|
||||
__global__ void CustomSquareBpropKernel(float *input1, float *input3, float *output, size_t size) {
|
||||
auto idx = blockIdx.x * THREADS + threadIdx.x;
|
||||
if (idx < size) {
|
||||
|
@ -25,24 +26,46 @@ __global__ void CustomSquareBpropKernel(float *input1, float *input3, float *out
|
|||
extern "C" int CustomSquareBprop(int nparam, void **params, int *ndims, int64_t **shapes, const char **dtypes,
|
||||
void *stream, void *extra) {
|
||||
cudaStream_t custream = static_cast<cudaStream_t>(stream);
|
||||
if (nparam != 4) return 1;
|
||||
void *input1 = params[0];
|
||||
void *input3 = params[2];
|
||||
void *output = params[3];
|
||||
|
||||
size_t size = 1;
|
||||
constexpr int OUTPUT_INDEX = 3;
|
||||
constexpr int TOTAL_PARAM_NUM = 4;
|
||||
|
||||
for (int i = 0; i < ndims[3]; i++) {
|
||||
size *= shapes[3][i];
|
||||
// Users can add any check on their need. If check fails, user can return any value larger than 0 to safely exit.
|
||||
// Return value larger than 0 will cause mindspore to stop computing and safely exit.
|
||||
// Specially, return 1 will show log: "Number of parameters passed is inconsistent with what the user wants".
|
||||
// return 2 will show log: "Type of parameters passed is inconsistent with what the user wants".
|
||||
|
||||
// This is to check if the num of parameters the same as what the user wants.
|
||||
// There are three inputs and one output, so the nparam should be 4.
|
||||
if (nparam != TOTAL_PARAM_NUM) {
|
||||
return 1;
|
||||
}
|
||||
int n = size / THREADS;
|
||||
|
||||
// This is to check if the type of parameters the same as what the user wants.
|
||||
for (int i = 0; i < nparam; i++) {
|
||||
if (strcmp(dtypes[i], "float32") != 0) {
|
||||
return 2;
|
||||
}
|
||||
}
|
||||
|
||||
// input1's index is 0, input2's index is 1, input3's index is 2 and output's index is 3
|
||||
void *input1 = params[0];
|
||||
void *input3 = params[2];
|
||||
void *output = params[3];
|
||||
|
||||
size_t size = 1;
|
||||
|
||||
// Cumprod of output's shape to compute elements'num
|
||||
for (int i = 0; i < ndims[OUTPUT_INDEX]; i++) {
|
||||
size *= shapes[OUTPUT_INDEX][i];
|
||||
}
|
||||
|
||||
int n = size / THREADS;
|
||||
|
||||
// Do the computation
|
||||
CustomSquareBpropKernel<<<n + 1, THREADS, 0, custream>>>(static_cast<float *>(input1), static_cast<float *>(input3),
|
||||
static_cast<float *>(output), size);
|
||||
|
||||
// When return 0, mindspore will continue to run if this kernel could launch successfully.
|
||||
return 0;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue