diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/lj/lj_direct_cf_force_with_lj_virial_direct_cf_energy_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/lj/lj_direct_cf_force_with_lj_virial_direct_cf_energy_impl.cu new file mode 100644 index 00000000000..55b8f14317b --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/lj/lj_direct_cf_force_with_lj_virial_direct_cf_energy_impl.cu @@ -0,0 +1,144 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + /** + *Note: + * LJForce. This is an experimental interface that is subject to change and/or deletion. + */ + +#include "backend/kernel_compiler/gpu/cuda_impl/sponge/lj/lj_direct_cf_force_with_lj_virial_direct_cf_energy_impl.cuh" +#include "backend/kernel_compiler/gpu/cuda_impl/sponge/common_sponge.cuh" + +__global__ void LJ_Direct_CF_Force_With_LJ_Virial_Direct_CF_Energy_CUDA( + const int atom_numbers, const NEIGHBOR_LIST *nl, const UINT_VECTOR_LJ_TYPE *uint_crd, const VECTOR *boxlength, + const float *LJ_type_A, const float *LJ_type_B, const float cutoff, VECTOR *frc, const float pme_beta, + const float sqrt_pi, float *atom_lj_virial, float *atom_direct_cf_energy) { + int atom_i = blockDim.x * blockIdx.x + threadIdx.x; + if (atom_i < atom_numbers) { + NEIGHBOR_LIST nl_i = nl[atom_i]; + int N = nl_i.atom_numbers; + int atom_j; + int int_x; + int int_y; + int int_z; + UINT_VECTOR_LJ_TYPE r1 = uint_crd[atom_i], r2; + VECTOR dr; + float dr_2; + float dr_4; + float dr_8; + float dr_6; + float frc_abs = 0.; + VECTOR frc_lin; + VECTOR frc_record = {0., 0., 0.}; + float charge_i = r1.charge; + float charge_j; + float dr_abs; + float dr_1; + float beta_dr; + float frc_cf_abs; + float virial_lin = 0.; + float energy_lin = 0.; + + int x, y; + int atom_pair_LJ_type; + for (int j = threadIdx.y; j < N; j = j + blockDim.y) { + atom_j = nl_i.atom_serial[j]; + r2 = uint_crd[atom_j]; + charge_j = r2.charge; + + int_x = r2.uint_x - r1.uint_x; + int_y = r2.uint_y - r1.uint_y; + int_z = r2.uint_z - r1.uint_z; + dr.x = boxlength[0].x * int_x; + dr.y = boxlength[0].y * int_y; + dr.z = boxlength[0].z * int_z; + dr_abs = norm3df(dr.x, dr.y, dr.z); + if (dr_abs < cutoff) { + dr_1 = 1. / dr_abs; + dr_2 = dr_1 * dr_1; + dr_4 = dr_2 * dr_2; + dr_8 = dr_4 * dr_4; + dr_6 = dr_4 * dr_2; + + y = (r2.LJ_type - r1.LJ_type); + x = y >> 31; + y = (y ^ x) - x; + x = r2.LJ_type + r1.LJ_type; + r2.LJ_type = (x + y) >> 1; + x = (x - y) >> 1; + atom_pair_LJ_type = (r2.LJ_type * (r2.LJ_type + 1) >> 1) + x; + + frc_abs = (-LJ_type_A[atom_pair_LJ_type] * dr_6 + LJ_type_B[atom_pair_LJ_type]) * dr_8; + beta_dr = pme_beta * dr_abs; + frc_cf_abs = beta_dr * sqrt_pi * expf(-beta_dr * beta_dr) + erfcf(beta_dr); + frc_cf_abs = frc_cf_abs * dr_2 * dr_1; + frc_cf_abs = charge_i * charge_j * frc_cf_abs; + energy_lin = energy_lin + charge_i * charge_j * erfcf(beta_dr) * dr_1; + virial_lin = virial_lin - frc_abs * dr_abs * dr_abs; + frc_abs = frc_abs - frc_cf_abs; + frc_lin.x = frc_abs * dr.x; + frc_lin.y = frc_abs * dr.y; + frc_lin.z = frc_abs * dr.z; + frc_record.x = frc_record.x + frc_lin.x; + frc_record.y = frc_record.y + frc_lin.y; + frc_record.z = frc_record.z + frc_lin.z; + atomicAdd(&frc[atom_j].x, -frc_lin.x); + atomicAdd(&frc[atom_j].y, -frc_lin.y); + atomicAdd(&frc[atom_j].z, -frc_lin.z); + } + } + atomicAdd(&frc[atom_i].x, frc_record.x); + atomicAdd(&frc[atom_i].y, frc_record.y); + atomicAdd(&frc[atom_i].z, frc_record.z); + + atomicAdd(&atom_direct_cf_energy[atom_i], energy_lin); + atomicAdd(&atom_lj_virial[atom_i], virial_lin); + } +} + +void LJ_Direct_CF_Force_With_LJ_Virial_Direct_CF_Energy( + const int atom_numbers, const float cutoff, const float pme_beta, const unsigned int *uint_crd_f, const int *LJtype, + const float *charge, const float *scaler_f, float *uint_crd_with_LJ, int *nl_atom_numbers, int *nl_atom_serial, + int *nl, const float *d_LJ_A, const float *d_LJ_B, float *frc_f, float *atom_lj_virial, float *atom_energy, + int max_neighbor_numbers, cudaStream_t stream) { + Reset_List<<(3. * atom_numbers) / 128), 128, 0, stream>>>(3 * atom_numbers, frc_f, 0.); + Reset_List<<(atom_numbers) / 128), 128, 0, stream>>>(atom_numbers, atom_energy, 0.); + Reset_List<<(atom_numbers) / 128), 128, 0, stream>>>(atom_numbers, atom_lj_virial, 0.); + VECTOR *frc = reinterpret_cast(frc_f); + VECTOR *scaler = const_cast(reinterpret_cast(scaler_f)); + NEIGHBOR_LIST *nl_a = reinterpret_cast(nl); + construct_neighbor_list_kernel<<(atom_numbers) / 128), 128, 0, stream>>>( + atom_numbers, max_neighbor_numbers, nl_atom_numbers, nl_atom_serial, nl_a); + + UINT_VECTOR_LJ_TYPE *uint_crd_with_LJ_a = reinterpret_cast(uint_crd_with_LJ); + + UNSIGNED_INT_VECTOR *uint_crd = + const_cast(reinterpret_cast(uint_crd_f)); + + Copy_Crd_To_New_Crd_Start<<(atom_numbers) / 32), 32, 0, stream>>>( + atom_numbers, uint_crd, uint_crd_with_LJ_a, LJtype, charge); + + LJ_Direct_CF_Force_With_LJ_Virial_Direct_CF_Energy_CUDA<<(atom_numbers) / 8), thread_LJ, 0, + stream>>>( + atom_numbers, nl_a, uint_crd_with_LJ_a, scaler, d_LJ_A, d_LJ_B, cutoff, frc, pme_beta, TWO_DIVIDED_BY_SQRT_PI, + atom_lj_virial, atom_energy); + return; +} + +void LJ_Direct_CF_Force_With_LJ_Virial_Direct_CF_Energy( + const int atom_numbers, const float cutoff, const float pme_beta, const unsigned int *uint_crd_f, const int *LJtype, + const float *charge, const float *scaler_f, float *uint_crd_with_LJ, int *nl_atom_numbers, int *nl_atom_serial, + int *nl, const float *d_LJ_A, const float *d_LJ_B, float *frc_f, float *atom_lj_virial, float *atom_energy, + int max_neighbor_numbers, cudaStream_t stream); diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/lj/lj_direct_cf_force_with_lj_virial_direct_cf_energy_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/lj/lj_direct_cf_force_with_lj_virial_direct_cf_energy_impl.cuh new file mode 100644 index 00000000000..4c8528422e8 --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/lj/lj_direct_cf_force_with_lj_virial_direct_cf_energy_impl.cuh @@ -0,0 +1,33 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + *Note: + * LJForce. This is an experimental interface that is subject to change and/or deletion. + */ + +#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_LJ_DIRECT_CF_FORCE_WITH_LJ_VIRIAL_DIRECT_CF_ENERGY_IMPL_H_ +#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_LJ_DIRECT_CF_FORCE_WITH_LJ_VIRIAL_DIRECT_CF_ENERGY_IMPL_H_ + +#include +#include "runtime/device/gpu/cuda_common.h" + +void LJ_Direct_CF_Force_With_LJ_Virial_Direct_CF_Energy( + const int atom_numbers, const float cutoff, const float pme_beta, const unsigned int *uint_crd_f, const int *LJtype, + const float *charge, const float *scaler_f, float *uint_crd_with_LJ, int *nl_atom_numbers, int *nl_atom_serial, + int *nl, const float *d_LJ_A, const float *d_LJ_B, float *frc_f, float *atom_lj_virial, float *atom_energy, + int max_neighbor_numbers, cudaStream_t stream); + +#endif diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nvtit/md_iteration_leap_frog_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nvtit/md_iteration_leap_frog_impl.cu index 2b710212fd4..9eb30507211 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nvtit/md_iteration_leap_frog_impl.cu +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nvtit/md_iteration_leap_frog_impl.cu @@ -13,14 +13,16 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +/** + *Note: + * MDIterationLeapFrog. This is an experimental interface that is subject to change and/or deletion. + */ #include "backend/kernel_compiler/gpu/cuda_impl/sponge/nvtit/md_iteration_leap_frog_impl.cuh" #include "backend/kernel_compiler/gpu/cuda_impl/sponge/common_sponge.cuh" -__global__ void MD_Iteration_Leap_Frog_With_LiuJian(const int atom_numbers, const float half_dt, const float dt, - const float exp_gamma, const float *inverse_mass, - const float *sqrt_mass_inverse, VECTOR *vel, VECTOR *crd, - VECTOR *frc, VECTOR *acc, VECTOR *random_frc) { +__global__ void MD_Iteration_Leap_Frog(const int atom_numbers, VECTOR *vel, VECTOR *crd, VECTOR *frc, VECTOR *acc, + const float *inverse_mass, const float dt) { int i = blockDim.x * blockIdx.x + threadIdx.x; if (i < atom_numbers) { acc[i].x = inverse_mass[i] * frc[i].x; @@ -31,17 +33,9 @@ __global__ void MD_Iteration_Leap_Frog_With_LiuJian(const int atom_numbers, cons vel[i].y = vel[i].y + dt * acc[i].y; vel[i].z = vel[i].z + dt * acc[i].z; - crd[i].x = crd[i].x + half_dt * vel[i].x; - crd[i].y = crd[i].y + half_dt * vel[i].y; - crd[i].z = crd[i].z + half_dt * vel[i].z; - - vel[i].x = exp_gamma * vel[i].x + sqrt_mass_inverse[i] * random_frc[i].x; - vel[i].y = exp_gamma * vel[i].y + sqrt_mass_inverse[i] * random_frc[i].y; - vel[i].z = exp_gamma * vel[i].z + sqrt_mass_inverse[i] * random_frc[i].z; - - crd[i].x = crd[i].x + half_dt * vel[i].x; - crd[i].y = crd[i].y + half_dt * vel[i].y; - crd[i].z = crd[i].z + half_dt * vel[i].z; + crd[i].x = crd[i].x + dt * vel[i].x; + crd[i].y = crd[i].y + dt * vel[i].y; + crd[i].z = crd[i].z + dt * vel[i].z; frc[i].x = 0.; frc[i].y = 0.; @@ -49,88 +43,12 @@ __global__ void MD_Iteration_Leap_Frog_With_LiuJian(const int atom_numbers, cons } } -__global__ void MD_Iteration_Leap_Frog_With_LiuJian_With_Max_Velocity(const int atom_numbers, const float half_dt, - const float dt, const float exp_gamma, - const float *inverse_mass, - const float *sqrt_mass_inverse, VECTOR *vel, - VECTOR *crd, VECTOR *frc, VECTOR *acc, - VECTOR *random_frc, const float max_vel) { - int i = blockDim.x * blockIdx.x + threadIdx.x; - float abs_vel; - if (i < atom_numbers) { - acc[i].x = inverse_mass[i] * frc[i].x; - acc[i].y = inverse_mass[i] * frc[i].y; - acc[i].z = inverse_mass[i] * frc[i].z; - - vel[i].x = vel[i].x + dt * acc[i].x; - vel[i].y = vel[i].y + dt * acc[i].y; - vel[i].z = vel[i].z + dt * acc[i].z; - - abs_vel = norm3df(vel[i].x, vel[i].y, vel[i].z); - if (abs_vel < max_vel) { - } else { - abs_vel = max_vel / abs_vel; - vel[i].x = abs_vel * vel[i].x; - vel[i].y = abs_vel * vel[i].y; - vel[i].z = abs_vel * vel[i].z; - } - - crd[i].x = crd[i].x + half_dt * vel[i].x; - crd[i].y = crd[i].y + half_dt * vel[i].y; - crd[i].z = crd[i].z + half_dt * vel[i].z; - - vel[i].x = exp_gamma * vel[i].x + sqrt_mass_inverse[i] * random_frc[i].x; - vel[i].y = exp_gamma * vel[i].y + sqrt_mass_inverse[i] * random_frc[i].y; - vel[i].z = exp_gamma * vel[i].z + sqrt_mass_inverse[i] * random_frc[i].z; - - crd[i].x = crd[i].x + half_dt * vel[i].x; - crd[i].y = crd[i].y + half_dt * vel[i].y; - crd[i].z = crd[i].z + half_dt * vel[i].z; - - frc[i].x = 0.; - frc[i].y = 0.; - frc[i].z = 0.; - } +void MDIterationLeapFrog(const int atom_numbers, float *vel, float *crd, float *frc, float *acc, + const float *inverse_mass, const float dt, cudaStream_t stream) { + VECTOR *d_vel = reinterpret_cast(vel); + VECTOR *d_crd = reinterpret_cast(crd); + VECTOR *d_frc = reinterpret_cast(frc); + VECTOR *d_acc = reinterpret_cast(acc); + MD_Iteration_Leap_Frog<<(atom_numbers) / 128), 128, 0, stream>>>( + atom_numbers, d_vel, d_crd, d_frc, d_acc, inverse_mass, dt); } - -void MDIterationLeapFrog(const int float4_numbers, const int atom_numbers, const float half_dt, const float dt, - const float exp_gamma, const int is_max_velocity, const float max_velocity, - const float *d_mass_inverse, const float *d_sqrt_mass, float *vel_f, float *crd_f, - float *frc_f, float *acc_f, cudaStream_t stream) { - Reset_List<<(3. * atom_numbers) / 128), 128>>>(3 * atom_numbers, acc_f, 0.); - - VECTOR *frc = const_cast(reinterpret_cast(frc_f)); - VECTOR *vel = const_cast(reinterpret_cast(vel_f)); - VECTOR *acc = const_cast(reinterpret_cast(acc_f)); - VECTOR *crd = const_cast(reinterpret_cast(crd_f)); - - curandStatePhilox4_32_10_t *rand_state; - VECTOR *random_force; - - Cuda_Malloc_Safely(reinterpret_cast(&random_force), sizeof(float4) * float4_numbers); - Cuda_Malloc_Safely(reinterpret_cast(&rand_state), sizeof(curandStatePhilox4_32_10_t) * float4_numbers); - Setup_Rand_Normal_Kernel<<(float4_numbers) / 32.), 32>>>(float4_numbers, rand_state, 1); - Rand_Normal<<(float4_numbers) / 32.), 32, 0, stream>>>( - float4_numbers, rand_state, reinterpret_cast(random_force)); - - if (!is_max_velocity) { - MD_Iteration_Leap_Frog_With_LiuJian<<(atom_numbers) / 32), 32, 0, stream>>>( - atom_numbers, half_dt, dt, exp_gamma, d_mass_inverse, d_sqrt_mass, vel, crd, frc, acc, random_force); - } else { - MD_Iteration_Leap_Frog_With_LiuJian_With_Max_Velocity<<(atom_numbers) / 32), 32, 0, - stream>>>(atom_numbers, half_dt, dt, exp_gamma, - d_mass_inverse, d_sqrt_mass, vel, crd, frc, acc, - random_force, max_velocity); - - cudaStreamSynchronize(stream); - cudaFree(random_force); - cudaFree(rand_state); - - return; - } -} - -void MDIterationLeapFrog(const int float4_numbers, const int atom_numbers, const float half_dt, const float dt, - const float exp_gamma, const int is_max_velocity, const float max_velocity, - const float *d_mass_inverse, const float *d_sqrt_mass, float *vel_f, float *crd_f, - float *frc_f, float *acc_f, cudaStream_t stream); diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nvtit/md_iteration_leap_frog_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nvtit/md_iteration_leap_frog_impl.cuh index 1db936bda9a..cee7ecbd06a 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nvtit/md_iteration_leap_frog_impl.cuh +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nvtit/md_iteration_leap_frog_impl.cuh @@ -13,15 +13,18 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +/** + *Note: + * MDIterationLeapFrog. This is an experimental interface that is subject to change and/or deletion. + */ + #ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NVTIT_MD_ITERATION_LEAP_FROG_IMPL_H #define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NVTIT_MD_ITERATION_LEAP_FROG_IMPL_H #include #include "runtime/device/gpu/cuda_common.h" -void MDIterationLeapFrog(const int float4_numbers, const int atom_numbers, const float half_dt, const float dt, - const float exp_gamma, const int is_max_velocity, const float max_velocity, - const float *d_mass_inverse, const float *d_sqrt_mass, float *vel_f, float *crd_f, - float *frc_f, float *acc_f, cudaStream_t stream); +void MDIterationLeapFrog(const int atom_numbers, float *vel, float *crd, float *frc, float *acc, + const float *inverse_mass, const float dt, cudaStream_t stream); -#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NVTIT_MD_ITERATION_LEAP_FROG_IMPL_H +#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_MD_ITERATION_LEAP_FROG_LIUJIAN_GPU_IMPL_H_ diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_common.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_common.cuh index c8ef5052a9f..754d3c4f2f2 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_common.cuh +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_common.cuh @@ -13,6 +13,10 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +/** + *Note: + * PME_Common. This is an experimental interface that is subject to change and/or deletion. + */ #ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_PME_PME_COMMON_H_ #define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_PME_PME_COMMON_H_ @@ -25,7 +29,7 @@ __constant__ float PME_dMa[4] = {0.5, -1.5, 1.5, -0.5}; __constant__ float PME_dMb[4] = {0, 1, -2, 1}; __constant__ float PME_dMc[4] = {0, 0.5, 0, -0.5}; #define PI 3.1415926 -const float periodic_factor_inverse = 2.32830643e-10; +const float periodic_factor_inverse = 2.3283064365387e-10; static dim3 thread_PME; const float cutoff = 10.0; @@ -98,6 +102,10 @@ static float getb(int k, int NFFT, int B_order) { return res.x * res.x + res.y * res.y; } +__global__ static void device_add(float *ene, float *factor, float *charge_sum) { + ene[0] += factor[0] * charge_sum[0] * charge_sum[0]; +} + __global__ static void PME_Atom_Near(const UNSIGNED_INT_VECTOR *uint_crd, int *PME_atom_near, const int PME_Nin, const float periodic_factor_inverse_x, const float periodic_factor_inverse_y, const float periodic_factor_inverse_z, const int atom_numbers, const int fftx, @@ -130,10 +138,13 @@ __global__ static void PME_Atom_Near(const UNSIGNED_INT_VECTOR *uint_crd, int *P UNSIGNED_INT_VECTOR temp_kxyz = PME_kxyz[k]; kx = tempux - temp_kxyz.uint_x; if (kx < 0) kx += fftx; + if (kx > fftx) kx -= fftx; ky = tempuy - temp_kxyz.uint_y; if (ky < 0) ky += ffty; + if (ky > ffty) ky -= ffty; kz = tempuz - temp_kxyz.uint_z; if (kz < 0) kz += fftz; + if (kz > fftz) kz -= fftz; temp_near[k] = kx * PME_Nin + ky * fftz + kz; } } @@ -227,4 +238,120 @@ __global__ static void PME_Direct_Energy(const int atom_numbers, const NEIGHBOR_ } } +__global__ static void PME_Direct_Atom_Energy(const int atom_numbers, const NEIGHBOR_LIST *nl, + const UNSIGNED_INT_VECTOR *uint_crd, const VECTOR *boxlength, + const float *charge, const float beta, const float cutoff_square, + float *direct_ene) { + int atom_i = blockDim.x * blockIdx.x + threadIdx.x; + if (atom_i < atom_numbers) { + NEIGHBOR_LIST nl_i = nl[atom_i]; + int N = nl_i.atom_numbers; + int atom_j; + int int_x; + int int_y; + int int_z; + UNSIGNED_INT_VECTOR r1 = uint_crd[atom_i], r2; + VECTOR dr; + float dr2; + float dr_abs; + // float dr_inverse; + float ene_temp; + float charge_i = charge[atom_i]; + float ene_lin = 0.; + + for (int j = threadIdx.y; j < N; j = j + blockDim.y) { + atom_j = nl_i.atom_serial[j]; + r2 = uint_crd[atom_j]; + + int_x = r2.uint_x - r1.uint_x; + int_y = r2.uint_y - r1.uint_y; + int_z = r2.uint_z - r1.uint_z; + dr.x = boxlength[0].x * int_x; + dr.y = boxlength[0].y * int_y; + dr.z = boxlength[0].z * int_z; + + dr2 = dr.x * dr.x + dr.y * dr.y + dr.z * dr.z; + if (dr2 < cutoff_square) { + dr_abs = norm3df(dr.x, dr.y, dr.z); + ene_temp = charge_i * charge[atom_j] * erfcf(beta * dr_abs) / dr_abs; + ene_lin = ene_lin + ene_temp; + } + } + atomicAdd(&direct_ene[atom_i], ene_lin); + } +} + +__global__ static void PME_Energy_Product(const int element_number, const float *list1, const float *list2, + float *sum) { + if (threadIdx.x == 0) { + sum[0] = 0.; + } + __syncthreads(); + float lin = 0.0; + for (int i = threadIdx.x; i < element_number; i = i + blockDim.x) { + lin = lin + list1[i] * list2[i]; + } + atomicAdd(sum, lin); +} + +__global__ static void PME_BCFQ(cufftComplex *PME_FQ, float *PME_BC, int PME_Nfft) { + int index = blockDim.x * blockIdx.x + threadIdx.x; + if (index < PME_Nfft) { + float tempf = PME_BC[index]; + cufftComplex tempc = PME_FQ[index]; + PME_FQ[index].x = tempc.x * tempf; + PME_FQ[index].y = tempc.y * tempf; + } +} + +__global__ static void PME_Excluded_Energy_Correction(const int atom_numbers, const UNSIGNED_INT_VECTOR *uint_crd, + const VECTOR *sacler, const float *charge, const float pme_beta, + const float sqrt_pi, const int *excluded_list_start, + const int *excluded_list, const int *excluded_atom_numbers, + float *ene) { + int atom_i = blockDim.x * blockIdx.x + threadIdx.x; + if (atom_i < atom_numbers) { + int excluded_number = excluded_atom_numbers[atom_i]; + if (excluded_number > 0) { + int list_start = excluded_list_start[atom_i]; + // int atom_min = excluded_list[list_start]; + int list_end = list_start + excluded_number; + int atom_j; + int int_x; + int int_y; + int int_z; + + float charge_i = charge[atom_i]; + float charge_j; + float dr_abs; + float beta_dr; + + UNSIGNED_INT_VECTOR r1 = uint_crd[atom_i], r2; + VECTOR dr; + float dr2; + + float ene_lin = 0.; + + for (int i = list_start; i < list_end; i = i + 1) { + atom_j = excluded_list[i]; + r2 = uint_crd[atom_j]; + charge_j = charge[atom_j]; + + int_x = r2.uint_x - r1.uint_x; + int_y = r2.uint_y - r1.uint_y; + int_z = r2.uint_z - r1.uint_z; + dr.x = sacler[0].x * int_x; + dr.y = sacler[0].y * int_y; + dr.z = sacler[0].z * int_z; + dr2 = dr.x * dr.x + dr.y * dr.y + dr.z * dr.z; + + dr_abs = sqrtf(dr2); + beta_dr = pme_beta * dr_abs; + + ene_lin -= charge_i * charge_j * erff(beta_dr) / dr_abs; + } + atomicAdd(ene, ene_lin); + } + } +} #endif diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_energy_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_energy_impl.cu index 9cffb10bd53..0fcee94bae9 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_energy_impl.cu +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_energy_impl.cu @@ -13,22 +13,14 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +/** + *Note: + * PMEEnergy. This is an experimental interface that is subject to change and/or deletion. + */ #include "backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_energy_impl.cuh" #include "backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_common.cuh" #include "backend/kernel_compiler/gpu/cuda_impl/sponge/common_sponge.cuh" -__global__ void PME_Energy_Product(const int element_number, const float *list1, const float *list2, float *sum) { - if (threadIdx.x == 0) { - sum[0] = 0.; - } - __syncthreads(); - float lin = 0.0; - for (int i = threadIdx.x; i < element_number; i = i + blockDim.x) { - lin = lin + list1[i] * list2[i]; - } - atomicAdd(sum, lin); -} - __global__ void PME_Energy_Reciprocal(const int element_number, const cufftComplex *FQ, const float *BC, float *sum) { if (threadIdx.x == 0) { sum[0] = 0.; @@ -43,56 +35,6 @@ __global__ void PME_Energy_Reciprocal(const int element_number, const cufftCompl atomicAdd(sum, lin); } -__global__ void PME_Excluded_Energy_Correction(const int atom_numbers, const UNSIGNED_INT_VECTOR *uint_crd, - const VECTOR *sacler, const float *charge, const float pme_beta, - const float sqrt_pi, const int *excluded_list_start, - const int *excluded_list, const int *excluded_atom_numbers, float *ene) { - int atom_i = blockDim.x * blockIdx.x + threadIdx.x; - if (atom_i < atom_numbers) { - int excluded_number = excluded_atom_numbers[atom_i]; - if (excluded_number > 0) { - int list_start = excluded_list_start[atom_i]; - // int atom_min = excluded_list[list_start]; - int list_end = list_start + excluded_number; - int atom_j; - int int_x; - int int_y; - int int_z; - - float charge_i = charge[atom_i]; - float charge_j; - float dr_abs; - float beta_dr; - - UNSIGNED_INT_VECTOR r1 = uint_crd[atom_i], r2; - VECTOR dr; - float dr2; - - float ene_lin = 0.; - - for (int i = list_start; i < list_end; i = i + 1) { - atom_j = excluded_list[i]; - r2 = uint_crd[atom_j]; - charge_j = charge[atom_j]; - - int_x = r2.uint_x - r1.uint_x; - int_y = r2.uint_y - r1.uint_y; - int_z = r2.uint_z - r1.uint_z; - dr.x = sacler[0].x * int_x; - dr.y = sacler[0].y * int_y; - dr.z = sacler[0].z * int_z; - dr2 = dr.x * dr.x + dr.y * dr.y + dr.z * dr.z; - - dr_abs = sqrtf(dr2); - beta_dr = pme_beta * dr_abs; - - ene_lin -= charge_i * charge_j * erff(beta_dr) / dr_abs; - } - atomicAdd(ene, ene_lin); - } - } -} - void PMEEnergy(int fftx, int ffty, int fftz, int atom_numbers, float beta, float *PME_BC, int *pme_uxyz, float *pme_frxyz, float *PME_Q, float *pme_fq, int *PME_atom_near, int *pme_kxyz, const int *uint_crd_f, const float *charge, int *nl_atom_numbers, int *nl_atom_serial, int *nl, const float *scaler_f, diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_energy_update_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_energy_update_impl.cu new file mode 100644 index 00000000000..6ba0dd87cd9 --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_energy_update_impl.cu @@ -0,0 +1,90 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + *Note: + * PMEEnergyUpdate. This is an experimental interface that is subject to change and/or deletion. + */ +#include "backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_energy_update_impl.cuh" +#include "backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_common.cuh" +#include "backend/kernel_compiler/gpu/cuda_impl/sponge/common_sponge.cuh" + +__global__ void PME_Energy_Reciprocal_update(const int element_number, const cufftComplex *FQ, const float *BC, + float *sum) { + if (threadIdx.x == 0) { + sum[0] = 0.; + } + __syncthreads(); + float lin = 0.0; + cufftComplex FQ_i; + for (int i = threadIdx.x; i < element_number; i = i + blockDim.x) { + FQ_i = FQ[i]; + lin = lin + (FQ_i.x * FQ_i.x + FQ_i.y * FQ_i.y) * BC[i]; + } + atomicAdd(sum, lin); +} + +void PMEEnergyUpdate(int fftx, int ffty, int fftz, int atom_numbers, float beta, float *PME_BC, int *pme_uxyz, + float *pme_frxyz, float *PME_Q, float *pme_fq, int *PME_atom_near, int *pme_kxyz, + const int *uint_crd_f, const float *charge, int *nl_atom_numbers, int *nl_atom_serial, int *nl, + const float *scaler_f, const int *excluded_list_start, const int *excluded_list, + const int *excluded_atom_numbers, float *d_reciprocal_ene, float *d_self_ene, float *d_direct_ene, + float *d_correction_ene, dim3 thread_PME, int PME_Nin, int PME_Nfft, int PME_Nall, + const cufftHandle &PME_plan_r2c, const cufftHandle &PME_plan_c2r, float *neutralizing_factor, + float *charge_sum, int max_neighbor_numbers, cudaStream_t stream) { + UNSIGNED_INT_VECTOR *uint_crd = + const_cast(reinterpret_cast(uint_crd_f)); + VECTOR *scaler = const_cast(reinterpret_cast(scaler_f)); + // int max_neighbor_numbers = 800; + NEIGHBOR_LIST *nl_a = reinterpret_cast(nl); + construct_neighbor_list_kernel<<(atom_numbers) / 128), 128, 0, stream>>>( + atom_numbers, max_neighbor_numbers, nl_atom_numbers, nl_atom_serial, nl_a); + + UNSIGNED_INT_VECTOR *PME_uxyz = reinterpret_cast(pme_uxyz); + UNSIGNED_INT_VECTOR *PME_kxyz = reinterpret_cast(pme_kxyz); + VECTOR *PME_frxyz = reinterpret_cast(pme_frxyz); + cufftComplex *PME_FQ = reinterpret_cast(pme_fq); + + Reset_List<<<3 * atom_numbers / 32 + 1, 32, 0, stream>>>(3 * atom_numbers, reinterpret_cast(PME_uxyz), + 1 << 30); + PME_Atom_Near<<>>( + uint_crd, PME_atom_near, PME_Nin, periodic_factor_inverse * fftx, periodic_factor_inverse * ffty, + periodic_factor_inverse * fftz, atom_numbers, fftx, ffty, fftz, PME_kxyz, PME_uxyz, PME_frxyz); + + Reset_List<<>>(PME_Nall, PME_Q, 0); + + PME_Q_Spread<<>>(PME_atom_near, charge, PME_frxyz, PME_Q, + PME_kxyz, atom_numbers); + + cufftExecR2C(PME_plan_r2c, reinterpret_cast(PME_Q), reinterpret_cast(PME_FQ)); + + PME_Energy_Reciprocal_update<<<1, 1024, 0, stream>>>(PME_Nfft, PME_FQ, PME_BC, d_reciprocal_ene); + + PME_Energy_Product<<<1, 1024, 0, stream>>>(atom_numbers, charge, charge, d_self_ene); + Scale_List<<<1, 1, 0, stream>>>(1, d_self_ene, -beta / sqrtf(PI)); + + Sum_Of_List<<<1, 1024>>>(atom_numbers, charge, charge_sum); + device_add<<<1, 1>>>(d_self_ene, neutralizing_factor, charge_sum); + + Reset_List<<<1, 1, 0, stream>>>(1, d_direct_ene, 0.0); + PME_Direct_Energy<<>>( + atom_numbers, nl_a, uint_crd, scaler, charge, beta, cutoff * cutoff, d_direct_ene); + + Reset_List<<<1, 1, 0, stream>>>(1, d_correction_ene, 0.0); + PME_Excluded_Energy_Correction<<>>( + atom_numbers, uint_crd, scaler, charge, beta, sqrtf(PI), excluded_list_start, excluded_list, excluded_atom_numbers, + d_correction_ene); + return; +} diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_energy_update_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_energy_update_impl.cuh new file mode 100644 index 00000000000..ae258bd52e7 --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_energy_update_impl.cuh @@ -0,0 +1,31 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_PME_PME_ENERGY_UPDATE_IMPL_H_ +#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_PME_PME_ENERGY_UPDATE_IMPL_H_ + +#include +#include "runtime/device/gpu/cuda_common.h" + +void PMEEnergyUpdate(int fftx, int ffty, int fftz, int atom_numbers, float beta, float *PME_BC, int *pme_uxyz, + float *pme_frxyz, float *PME_Q, float *pme_fq, int *PME_atom_near, int *pme_kxyz, + const int *uint_crd_f, const float *charge, int *nl_atom_numbers, int *nl_atom_serial, int *nl, + const float *scaler_f, const int *excluded_list_start, const int *excluded_list, + const int *excluded_atom_numbers, float *d_reciprocal_ene, float *d_self_ene, float *d_direct_ene, + float *d_correction_ene, dim3 thread_PME, int PME_Nin, int PME_Nfft, int PME_Nall, + const cufftHandle &PME_plan_r2c, const cufftHandle &PME_plan_c2r, float *neutralizing_factor, + float *charge_sum, int max_neighbor_numbers, cudaStream_t stream); + +#endif diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_reciprocal_force_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_reciprocal_force_impl.cu index b9cb2f29d04..011ab704c37 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_reciprocal_force_impl.cu +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_reciprocal_force_impl.cu @@ -13,19 +13,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +/** + *Note: + * PMEReciprocalForce. This is an experimental interface that is subject to change and/or deletion. + */ #include "backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_reciprocal_force_impl.cuh" #include "backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_common.cuh" -__global__ void PME_BCFQ(cufftComplex *PME_FQ, float *PME_BC, int PME_Nfft) { - int index = blockDim.x * blockIdx.x + threadIdx.x; - if (index < PME_Nfft) { - float tempf = PME_BC[index]; - cufftComplex tempc = PME_FQ[index]; - PME_FQ[index].x = tempc.x * tempf; - PME_FQ[index].y = tempc.y * tempf; - } -} - __global__ void PME_Final(int *PME_atom_near, const float *charge, const float *PME_Q, VECTOR *force, const VECTOR *PME_frxyz, const UNSIGNED_INT_VECTOR *PME_kxyz, const _VECTOR PME_inverse_box_vector, const int atom_numbers) { @@ -92,9 +86,11 @@ void PMEReciprocalForce(int fftx, int ffty, int fftz, int atom_numbers, float be // initial end Reset_List<<(3. * atom_numbers) / 128), 128, 0, stream>>>( 3 * atom_numbers, reinterpret_cast(frc), 0.); + PME_Atom_Near<<>>( uint_crd, PME_atom_near, PME_Nin, periodic_factor_inverse * fftx, periodic_factor_inverse * ffty, periodic_factor_inverse * fftz, atom_numbers, fftx, ffty, fftz, PME_kxyz, PME_uxyz, PME_frxyz); + Reset_List<<>>(PME_Nall, PME_Q, 0); PME_Q_Spread<<>>(PME_atom_near, charge, PME_frxyz, PME_Q, diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/simple_constrain/constrain_force_cycle_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/simple_constrain/constrain_force_cycle_impl.cu index 0adea90a989..cf3c2d93a5d 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/simple_constrain/constrain_force_cycle_impl.cu +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/simple_constrain/constrain_force_cycle_impl.cu @@ -13,9 +13,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +/** + *Note: + * Constrain_Force_Cycle. This is an experimental interface that is subject to change and/or deletion. + */ -#include "backend/kernel_compiler/gpu/cuda_impl/sponge/simple_constrain/constrain_force_cycle_impl.cuh" #include "backend/kernel_compiler/gpu/cuda_impl/sponge/common_sponge.cuh" +#include "backend/kernel_compiler/gpu/cuda_impl/sponge/simple_constrain/constrain_force_cycle_impl.cuh" __global__ void Constrain_Force_Cycle_Kernel(const int constrain_pair_numbers, const UNSIGNED_INT_VECTOR *uint_crd, const VECTOR *scaler, const CONSTRAIN_PAIR *constrain_pair, @@ -64,10 +68,10 @@ void Constrain_Force_Cycle(int atom_numbers, int constrain_pair_numbers, const u CONSTRAIN_PAIR *constrain_pair = reinterpret_cast(constrain_pair_f); construct_constrain_pair<<(constrain_pair_numbers) / 128), 128, 0, stream>>>( - constrain_pair_numbers, atom_i_serials, atom_j_serials, constant_rs, constrain_ks, constrain_pair); + constrain_pair_numbers, atom_i_serials, atom_j_serials, constant_rs, constrain_ks, constrain_pair); Constrain_Force_Cycle_Kernel<<>>( - constrain_pair_numbers, uint_crd, scaler, constrain_pair, pair_dr, test_frc); + constrain_pair_numbers, uint_crd, scaler, constrain_pair, pair_dr, test_frc); return; } diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/simple_constrain/constrain_force_cycle_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/simple_constrain/constrain_force_cycle_impl.cuh index a05689c8d38..ee8f3e99e6f 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/simple_constrain/constrain_force_cycle_impl.cuh +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/simple_constrain/constrain_force_cycle_impl.cuh @@ -13,6 +13,10 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +/** + *Note: + * Constrain_Force_Cycle. This is an experimental interface that is subject to change and/or deletion. + */ #ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_SIMPLE_CONSTRAIN_CONSTRAIN_FORCE_CYCLE_IMPL_H_ #define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_SIMPLE_CONSTRAIN_CONSTRAIN_FORCE_CYCLE_IMPL_H_ @@ -24,4 +28,5 @@ void Constrain_Force_Cycle(int atom_numbers, int constrain_pair_numbers, const u const float *scaler_f, float *constrain_pair_f, const float *pair_dr_f, const int *atom_i_serials, const int *atom_j_serials, const float *constant_rs, const float *constrain_ks, float *test_frc_f, cudaStream_t stream); -#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_SIMPLE_CONSTRAIN_CONSTRAIN_FORCE_CYCLE_WITH_VIRIAL_IMPL_H_ + +#endif diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/simple_constrain/constrain_force_cycle_with_virial_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/simple_constrain/constrain_force_cycle_with_virial_impl.cu index edcd6d00598..26436a551d8 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/simple_constrain/constrain_force_cycle_with_virial_impl.cu +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/simple_constrain/constrain_force_cycle_with_virial_impl.cu @@ -13,9 +13,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +/** + *Note: + * ConstrainForceCycleVirial. This is an experimental interface that is subject to change and/or deletion. + */ -#include "backend/kernel_compiler/gpu/cuda_impl/sponge/simple_constrain/constrain_force_cycle_with_virial_impl.cuh" #include "backend/kernel_compiler/gpu/cuda_impl/sponge/common_sponge.cuh" +#include "backend/kernel_compiler/gpu/cuda_impl/sponge/simple_constrain/constrain_force_cycle_with_virial_impl.cuh" __global__ void Constrain_Force_Cycle_With_Virial(int constrain_pair_numbers, const UNSIGNED_INT_VECTOR *uint_crd, const VECTOR *scaler, CONSTRAIN_PAIR *constrain_pair, @@ -46,7 +50,8 @@ void Constrain_Force_Cycle_With_Virial(int atom_numbers, int constrain_pair_numb const float *constrain_ks, float *test_frc_f, float *d_atom_virial, cudaStream_t stream) { Reset_List<<(3 * atom_numbers) / 128), 128, 0, stream>>>(3 * atom_numbers, test_frc_f, 0.); - Reset_List<<(atom_numbers) / 128), 128, 0, stream>>>(atom_numbers, d_atom_virial, 0.); + Reset_List<<(constrain_pair_numbers) / 128), 128, 0, stream>>>(constrain_pair_numbers, + d_atom_virial, 0.); size_t thread_per_block = 128; size_t block_per_grid = ceilf(static_cast(atom_numbers) / 128); const UNSIGNED_INT_VECTOR *uint_crd = reinterpret_cast(uint_crd_f); @@ -58,10 +63,10 @@ void Constrain_Force_Cycle_With_Virial(int atom_numbers, int constrain_pair_numb CONSTRAIN_PAIR *constrain_pair = reinterpret_cast(constrain_pair_f); construct_constrain_pair<<(constrain_pair_numbers) / 128), 128, 0, stream>>>( - constrain_pair_numbers, atom_i_serials, atom_j_serials, constant_rs, constrain_ks, constrain_pair); + constrain_pair_numbers, atom_i_serials, atom_j_serials, constant_rs, constrain_ks, constrain_pair); Constrain_Force_Cycle_With_Virial<<>>( - constrain_pair_numbers, uint_crd, scaler, constrain_pair, pair_dr, test_frc, d_atom_virial); + constrain_pair_numbers, uint_crd, scaler, constrain_pair, pair_dr, test_frc, d_atom_virial); return; } diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/simple_constrain/constrain_force_cycle_with_virial_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/simple_constrain/constrain_force_cycle_with_virial_impl.cuh index 559b67a264a..1851d856748 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/simple_constrain/constrain_force_cycle_with_virial_impl.cuh +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/simple_constrain/constrain_force_cycle_with_virial_impl.cuh @@ -13,6 +13,10 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +/** + *Note: + * ConstrainForceCycleVirial. This is an experimental interface that is subject to change and/or deletion. + */ #ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_SIMPLE_CONSTRAIN_CONSTRAIN_FORCE_CYCLE_WITH_VIRIAL_IMPL_H_ #define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_SIMPLE_CONSTRAIN_CONSTRAIN_FORCE_CYCLE_WITH_VIRIAL_IMPL_H_ @@ -25,4 +29,4 @@ void Constrain_Force_Cycle_With_Virial(int atom_numbers, int constrain_pair_numb const int *atom_i_serials, const int *atom_j_serials, const float *constant_rs, const float *constrain_ks, float *test_frc_f, float *d_atom_virial, cudaStream_t stream); -#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_SIMPLE_CONSTRAIN_CONSTRAIN_FORCE_CYCLE_WITH_VIRIAL_IMPL_H_ +#endif diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/simple_constrain/constrain_force_virial_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/simple_constrain/constrain_force_virial_impl.cu new file mode 100644 index 00000000000..890b6e6fe71 --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/simple_constrain/constrain_force_virial_impl.cu @@ -0,0 +1,180 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + *Note: + * ConstrainForceCycleVirial. This is an experimental interface that is subject to change and/or deletion. + */ + +#include "backend/kernel_compiler/gpu/cuda_impl/sponge/common_sponge.cuh" +#include "backend/kernel_compiler/gpu/cuda_impl/sponge/simple_constrain/constrain_force_virial_impl.cuh" + +__global__ void constrain_force_cycle_with_virial_update_kernel(int constrain_pair_numbers, + const UNSIGNED_INT_VECTOR *uint_crd, + const VECTOR *scaler, CONSTRAIN_PAIR *constrain_pair, + const VECTOR *pair_dr, VECTOR *test_frc, + float *d_atom_virial) { + int pair_i = blockDim.x * blockIdx.x + threadIdx.x; + if (pair_i < constrain_pair_numbers) { + CONSTRAIN_PAIR cp = constrain_pair[pair_i]; + VECTOR dr0 = pair_dr[pair_i]; + VECTOR dr = Get_Periodic_Displacement(uint_crd[cp.atom_i_serial], uint_crd[cp.atom_j_serial], scaler[0]); + float r_1 = rnorm3df(dr.x, dr.y, dr.z); + float frc_abs = (1. - cp.constant_r * r_1) * cp.constrain_k; + VECTOR frc_lin = frc_abs * dr0; + d_atom_virial[pair_i] -= frc_lin * dr0; + + atomicAdd(&test_frc[cp.atom_j_serial].x, frc_lin.x); + atomicAdd(&test_frc[cp.atom_j_serial].y, frc_lin.y); + atomicAdd(&test_frc[cp.atom_j_serial].z, frc_lin.z); + + atomicAdd(&test_frc[cp.atom_i_serial].x, -frc_lin.x); + atomicAdd(&test_frc[cp.atom_i_serial].y, -frc_lin.y); + atomicAdd(&test_frc[cp.atom_i_serial].z, -frc_lin.z); + } +} + +__global__ void constrain_force_cycle_update_kernel(const int constrain_pair_numbers, + const UNSIGNED_INT_VECTOR *uint_crd, const VECTOR *scaler, + const CONSTRAIN_PAIR *constrain_pair, const VECTOR *pair_dr, + VECTOR *test_frc) { + int pair_i = blockDim.x * blockIdx.x + threadIdx.x; + if (pair_i < constrain_pair_numbers) { + CONSTRAIN_PAIR cp = constrain_pair[pair_i]; + float r_1; + VECTOR dr; + float frc_abs; + VECTOR frc_lin; + + dr.x = (static_cast(uint_crd[cp.atom_i_serial].uint_x - uint_crd[cp.atom_j_serial].uint_x)) * scaler[0].x; + dr.y = (static_cast(uint_crd[cp.atom_i_serial].uint_y - uint_crd[cp.atom_j_serial].uint_y)) * scaler[0].y; + dr.z = (static_cast(uint_crd[cp.atom_i_serial].uint_z - uint_crd[cp.atom_j_serial].uint_z)) * scaler[0].z; + r_1 = rnorm3df(dr.x, dr.y, dr.z); + frc_abs = (1. - cp.constant_r * r_1) * cp.constrain_k; + + frc_lin.x = frc_abs * pair_dr[pair_i].x; + frc_lin.y = frc_abs * pair_dr[pair_i].y; + frc_lin.z = frc_abs * pair_dr[pair_i].z; + + atomicAdd(&test_frc[cp.atom_j_serial].x, frc_lin.x); + atomicAdd(&test_frc[cp.atom_j_serial].y, frc_lin.y); + atomicAdd(&test_frc[cp.atom_j_serial].z, frc_lin.z); + + atomicAdd(&test_frc[cp.atom_i_serial].x, -frc_lin.x); + atomicAdd(&test_frc[cp.atom_i_serial].y, -frc_lin.y); + atomicAdd(&test_frc[cp.atom_i_serial].z, -frc_lin.z); + } +} + +__global__ void refresh_uint_crd_update_kernel(int atom_numbers, const VECTOR *crd, + const VECTOR *quarter_crd_to_uint_crd_cof, UNSIGNED_INT_VECTOR *uint_crd, + VECTOR *test_frc, const float *mass_inverse, + const float half_exp_gamma_plus_half) { + int atom_i = blockDim.x * blockIdx.x + threadIdx.x; + if (atom_i < atom_numbers) { + INT_VECTOR tempi; + VECTOR crd_lin = crd[atom_i]; + VECTOR frc_lin = test_frc[atom_i]; + float mass_lin = mass_inverse[atom_i]; + + crd_lin.x = crd_lin.x + half_exp_gamma_plus_half * frc_lin.x * mass_lin; + crd_lin.y = crd_lin.y + half_exp_gamma_plus_half * frc_lin.y * mass_lin; + crd_lin.z = crd_lin.z + half_exp_gamma_plus_half * frc_lin.z * mass_lin; + + tempi.int_x = crd_lin.x * quarter_crd_to_uint_crd_cof[0].x; + tempi.int_y = crd_lin.y * quarter_crd_to_uint_crd_cof[0].y; + tempi.int_z = crd_lin.z * quarter_crd_to_uint_crd_cof[0].z; + + uint_crd[atom_i].uint_x = tempi.int_x << 2; + uint_crd[atom_i].uint_y = tempi.int_y << 2; + uint_crd[atom_i].uint_z = tempi.int_z << 2; + } +} + +void constrain_force_cycle_update(int atom_numbers, int constrain_pair_numbers, const unsigned int *uint_crd_f, + const float *scaler_f, float *constrain_pair_f, const float *pair_dr_f, + const int *atom_i_serials, const int *atom_j_serials, const float *constant_rs, + const float *constrain_ks, float *test_frc_f, cudaStream_t stream) { + size_t thread_per_block = 128; + size_t block_per_grid = ceilf(static_cast(atom_numbers) / 128); + const UNSIGNED_INT_VECTOR *uint_crd = reinterpret_cast(uint_crd_f); + const VECTOR *scaler = reinterpret_cast(scaler_f); + const VECTOR *pair_dr = reinterpret_cast(pair_dr_f); + + VECTOR *test_frc = reinterpret_cast(test_frc_f); + + CONSTRAIN_PAIR *constrain_pair = reinterpret_cast(constrain_pair_f); + + construct_constrain_pair<<(constrain_pair_numbers) / 128), 128, 0, stream>>>( + constrain_pair_numbers, atom_i_serials, atom_j_serials, constant_rs, constrain_ks, constrain_pair); + + constrain_force_cycle_update_kernel<<>>( + constrain_pair_numbers, uint_crd, scaler, constrain_pair, pair_dr, test_frc); + + return; +} + +void constrain_force_cycle_with_virial_update(int atom_numbers, int constrain_pair_numbers, + const unsigned int *uint_crd_f, const float *scaler_f, + float *constrain_pair_f, const float *pair_dr_f, + const int *atom_i_serials, const int *atom_j_serials, + const float *constant_rs, const float *constrain_ks, float *test_frc_f, + float *d_atom_virial, cudaStream_t stream) { + size_t thread_per_block = 128; + size_t block_per_grid = ceilf(static_cast(atom_numbers) / 128); + const UNSIGNED_INT_VECTOR *uint_crd = reinterpret_cast(uint_crd_f); + const VECTOR *scaler = reinterpret_cast(scaler_f); + const VECTOR *pair_dr = reinterpret_cast(pair_dr_f); + + VECTOR *test_frc = reinterpret_cast(test_frc_f); + + CONSTRAIN_PAIR *constrain_pair = reinterpret_cast(constrain_pair_f); + + construct_constrain_pair<<(constrain_pair_numbers) / 128), 128, 0, stream>>>( + constrain_pair_numbers, atom_i_serials, atom_j_serials, constant_rs, constrain_ks, constrain_pair); + + constrain_force_cycle_with_virial_update_kernel<<>>( + constrain_pair_numbers, uint_crd, scaler, constrain_pair, pair_dr, test_frc, d_atom_virial); + + return; +} + +void refresh_uint_crd_update(int atom_numbers, float half_exp_gamma_plus_half, const float *crd_f, + const float *quarter_crd_to_uint_crd_cof_f, float *test_frc_f, const float *mass_inverse, + unsigned int *uint_crd_f, cudaStream_t stream) { + size_t thread_per_block = 128; + size_t block_per_grid = ceilf(static_cast(atom_numbers) / 128); + const VECTOR *crd = reinterpret_cast(crd_f); + const VECTOR *quarter_crd_to_uint_crd_cof = reinterpret_cast(quarter_crd_to_uint_crd_cof_f); + VECTOR *test_frc = reinterpret_cast(test_frc_f); + UNSIGNED_INT_VECTOR *uint_crd = reinterpret_cast(uint_crd_f); + + refresh_uint_crd_update_kernel<<>>( + atom_numbers, crd, quarter_crd_to_uint_crd_cof, uint_crd, test_frc, mass_inverse, half_exp_gamma_plus_half); + return; +} + +void set_zero_force_with_virial(int atom_numbers, int constrain_pair_numbers, float *test_frc_f, float *d_atom_virial, + cudaStream_t stream) { + Reset_List<<(3 * atom_numbers) / 128), 128, 0, stream>>>(3 * atom_numbers, test_frc_f, 0.); + Reset_List<<(constrain_pair_numbers) / 128), 128, 0, stream>>>(constrain_pair_numbers, + d_atom_virial, 0.); + return; +} + +void set_zero(int numbers, float *x, cudaStream_t stream) { + Reset_List<<(numbers) / 128), 128, 0, stream>>>(numbers, x, 0.); + return; +} diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/simple_constrain/constrain_force_virial_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/simple_constrain/constrain_force_virial_impl.cuh new file mode 100644 index 00000000000..a8a238f986e --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/simple_constrain/constrain_force_virial_impl.cuh @@ -0,0 +1,48 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + *Note: + * ConstrainForce. This is an experimental interface that is subject to change and/or deletion. + */ + +#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_SIMPLE_CONSTRAIN_CONSTRAIN_FORCE_VIRIAL_IMPL_H_ +#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_SIMPLE_CONSTRAIN_CONSTRAIN_FORCE_VIRIAL_IMPL_H_ + +#include +#include "runtime/device/gpu/cuda_common.h" + +void constrain_force_cycle_update(int atom_numbers, int constrain_pair_numbers, const unsigned int *uint_crd_f, + const float *scaler_f, float *constrain_pair_f, const float *pair_dr_f, + const int *atom_i_serials, const int *atom_j_serials, const float *constant_rs, + const float *constrain_ks, float *test_frc_f, cudaStream_t stream); + +void constrain_force_cycle_with_virial_update(int atom_numbers, int constrain_pair_numbers, + const unsigned int *uint_crd_f, const float *scaler_f, + float *constrain_pair_f, const float *pair_dr_f, + const int *atom_i_serials, const int *atom_j_serials, + const float *constant_rs, const float *constrain_ks, float *test_frc_f, + float *d_atom_virial, cudaStream_t stream); + +void refresh_uint_crd_update(int atom_numbers, float half_exp_gamma_plus_half, const float *crd_f, + const float *quarter_crd_to_uint_crd_cof_f, float *test_frc_f, const float *mass_inverse, + unsigned int *uint_crd_f, cudaStream_t stream); + +void set_zero_force_with_virial(int atom_numbers, int constrain_pair_numbers, float *test_frc_f, float *d_atom_virial, + cudaStream_t stream); + +void set_zero(int numbers, float *x, cudaStream_t stream); + +#endif diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/lj/lj_direct_cf_force_with_lj_virial_direct_cf_energy_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/lj/lj_direct_cf_force_with_lj_virial_direct_cf_energy_kernel.cc new file mode 100644 index 00000000000..a9b036eb9b8 --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/lj/lj_direct_cf_force_with_lj_virial_direct_cf_energy_kernel.cc @@ -0,0 +1,39 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + *Note: + * LJForceWithVirialEnergy. This is an experimental interface that is subject to change and/or deletion. + */ +#include "backend/kernel_compiler/gpu/sponge/lj/lj_direct_cf_force_with_lj_virial_direct_cf_energy_kernel.h" + +namespace mindspore { +namespace kernel { +MS_REG_GPU_KERNEL_THREE(LJForceWithVirialEnergy, + KernelAttr() + .AddInputAttr(kNumberTypeUInt32) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddOutputAttr(kNumberTypeFloat32) + .AddOutputAttr(kNumberTypeFloat32) + .AddOutputAttr(kNumberTypeFloat32), + LJForceWithVirialEnergyGpuKernel, float, int, unsigned int) +} // namespace kernel +} // namespace mindspore diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/lj/lj_direct_cf_force_with_lj_virial_direct_cf_energy_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/lj/lj_direct_cf_force_with_lj_virial_direct_cf_energy_kernel.h new file mode 100644 index 00000000000..dd2b970747a --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/lj/lj_direct_cf_force_with_lj_virial_direct_cf_energy_kernel.h @@ -0,0 +1,142 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + *Note: + * LJForceWithVirialEnergy. This is an experimental interface that is subject to change and/or deletion. + */ +#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_LJ_DIRECT_CF_FORCE_WITH_LJ_VIRIAL_DIRECT_CF_ENERGY_KERNEL_H_ +#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_LJ_DIRECT_CF_FORCE_WITH_LJ_VIRIAL_DIRECT_CF_ENERGY_KERNEL_H_ +#include +#include +#include +#include +#include "backend/kernel_compiler/gpu/gpu_kernel.h" +#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h" +#include "runtime/device/gpu/cuda_common.h" +#include "backend/kernel_compiler/gpu/cuda_impl/sponge/lj/lj_direct_cf_force_with_lj_virial_direct_cf_energy_impl.cuh" +namespace mindspore { +namespace kernel { +template +class LJForceWithVirialEnergyGpuKernel : public GpuKernel { + public: + LJForceWithVirialEnergyGpuKernel() : ele_uint_crd(1) {} + ~LJForceWithVirialEnergyGpuKernel() override = default; + + bool Init(const CNodePtr &kernel_node) override { + kernel_node_ = kernel_node; + atom_numbers = static_cast(GetAttr(kernel_node, "atom_numbers")); + max_neighbor_numbers = static_cast(GetAttr(kernel_node, "max_neighbor_numbers")); + cutoff = static_cast(GetAttr(kernel_node, "cutoff")); + pme_beta = static_cast(GetAttr(kernel_node, "pme_beta")); + + auto shape_uint_crd = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + auto shape_LJtype = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); + auto shape_charge = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2); + auto shape_scaler = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3); + auto shape_nl_numbers = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 4); + auto shape_nl_serial = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 5); + auto shape_d_LJ_a = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 6); + auto shape_d_LJ_b = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 7); + + for (size_t i = 0; i < shape_uint_crd.size(); i++) ele_uint_crd *= shape_uint_crd[i]; + for (size_t i = 0; i < shape_LJtype.size(); i++) ele_LJtype *= shape_LJtype[i]; + for (size_t i = 0; i < shape_charge.size(); i++) ele_charge *= shape_charge[i]; + for (size_t i = 0; i < shape_scaler.size(); i++) ele_scaler *= shape_scaler[i]; + for (size_t i = 0; i < shape_d_LJ_a.size(); i++) ele_d_LJ_a *= shape_d_LJ_a[i]; + for (size_t i = 0; i < shape_d_LJ_b.size(); i++) ele_d_LJ_b *= shape_d_LJ_b[i]; + + InitSizeLists(); + return true; + } + + const std::vector &GetInputSizeList() const override { return input_size_list_; } + const std::vector &GetOutputSizeList() const override { return output_size_list_; } + const std::vector &GetWorkspaceSizeList() const override { return workspace_size_list_; } + + bool Launch(const std::vector &inputs, const std::vector &workspace, + const std::vector &outputs, void *stream_ptr) override { + auto uint_crd = GetDeviceAddress(inputs, 0); + auto LJtype = GetDeviceAddress(inputs, 1); + auto charge = GetDeviceAddress(inputs, 2); + auto scaler = GetDeviceAddress(inputs, 3); + auto nl_numbers = GetDeviceAddress(inputs, 4); + auto nl_serial = GetDeviceAddress(inputs, 5); + auto d_LJ_a = GetDeviceAddress(inputs, 6); + auto d_LJ_b = GetDeviceAddress(inputs, 7); + + auto uint_crd_with_LJ = GetDeviceAddress(workspace, 0); + auto nl = GetDeviceAddress(workspace, 1); + + auto frc = GetDeviceAddress(outputs, 0); + auto atom_lj_virial = GetDeviceAddress(outputs, 1); + auto atom_energy = GetDeviceAddress(outputs, 2); + LJ_Direct_CF_Force_With_LJ_Virial_Direct_CF_Energy(atom_numbers, cutoff, pme_beta, uint_crd, LJtype, charge, scaler, + uint_crd_with_LJ, nl_numbers, nl_serial, nl, d_LJ_a, d_LJ_b, frc, + atom_lj_virial, atom_energy, max_neighbor_numbers, + reinterpret_cast(stream_ptr)); + return true; + } + + protected: + void InitSizeLists() override { + input_size_list_.push_back(ele_uint_crd * sizeof(T2)); + input_size_list_.push_back(ele_LJtype * sizeof(T1)); + input_size_list_.push_back(ele_charge * sizeof(T)); + input_size_list_.push_back(ele_scaler * sizeof(T)); + input_size_list_.push_back(atom_numbers * sizeof(T1)); + input_size_list_.push_back(max_neighbor_numbers * sizeof(T1)); + input_size_list_.push_back(ele_d_LJ_a * sizeof(T)); + input_size_list_.push_back(ele_d_LJ_b * sizeof(T)); + + workspace_size_list_.push_back(atom_numbers * max_neighbor_numbers * sizeof(T1)); + workspace_size_list_.push_back(atom_numbers * sizeof(UINT_VECTOR_LJ_TYPE)); + + output_size_list_.push_back(atom_numbers * 3 * sizeof(T)); + output_size_list_.push_back(atom_numbers * sizeof(T)); + output_size_list_.push_back(atom_numbers * sizeof(T)); + } + + private: + size_t ele_uint_crd = 1; + size_t ele_LJtype = 1; + size_t ele_charge = 1; + size_t ele_scaler = 1; + size_t ele_nl = 1; + size_t ele_d_LJ_a = 1; + size_t ele_d_LJ_b = 1; + + std::vector input_size_list_; + std::vector output_size_list_; + std::vector workspace_size_list_; + int atom_numbers; + int max_neighbor_numbers; + float pme_beta; + float cutoff; + struct UINT_VECTOR_LJ_TYPE { + unsigned int uint_x; + unsigned int uint_y; + unsigned int uint_z; + int LJ_type; + float charge; + }; + struct NEIGHBOR_LIST { + int atom_numbers; + int *atom_serial; + }; +}; +} // namespace kernel +} // namespace mindspore +#endif diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/lj/lj_force_with_pme_direct_force_update_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/lj/lj_force_with_pme_direct_force_update_kernel.cc new file mode 100644 index 00000000000..5d0fb140cfb --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/lj/lj_force_with_pme_direct_force_update_kernel.cc @@ -0,0 +1,38 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + *Note: + * LJForceWithPMEDirectForceUpdate. This is an experimental interface that is subject to change and/or deletion. + */ +#include "backend/kernel_compiler/gpu/sponge/lj/lj_force_with_pme_direct_force_update_kernel.h" + +namespace mindspore { +namespace kernel { +MS_REG_GPU_KERNEL_TWO(LJForceWithPMEDirectForceUpdate, + KernelAttr() + .AddInputAttr(kNumberTypeUInt32) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddOutputAttr(kNumberTypeFloat32), + LJForceWithPMEDirectForceUpdateGpuKernel, float, int) +} // namespace kernel +} // namespace mindspore diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/lj/lj_force_with_pme_direct_force_update_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/lj/lj_force_with_pme_direct_force_update_kernel.h new file mode 100644 index 00000000000..24458d7cc32 --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/lj/lj_force_with_pme_direct_force_update_kernel.h @@ -0,0 +1,151 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + *Note: + * LJForceWithPMEDirectForceUpdate. This is an experimental interface that is subject to change and/or deletion. + */ +#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONGE_LJ_LJ_FORCE_WITH_PME_DIRECT_FORCE_KERNEL_H_ +#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONGE_LJ_LJ_FORCE_WITH_PME_DIRECT_FORCE_KERNEL_H_ +#include +#include +#include +#include +#include "backend/kernel_compiler/gpu/gpu_kernel.h" +#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h" +#include "runtime/device/gpu/cuda_common.h" +#include "backend/kernel_compiler/gpu/cuda_impl/sponge/lj/lj_force_with_pme_direct_force_impl.cuh" +namespace mindspore { +namespace kernel { +template +class LJForceWithPMEDirectForceUpdateGpuKernel : public GpuKernel { + public: + LJForceWithPMEDirectForceUpdateGpuKernel() : ele_uint_crd(1) {} + ~LJForceWithPMEDirectForceUpdateGpuKernel() override = default; + + bool Init(const CNodePtr &kernel_node) override { + kernel_node_ = kernel_node; + atom_numbers = static_cast(GetAttr(kernel_node, "atom_numbers")); + cutoff = static_cast(GetAttr(kernel_node, "cutoff")); + pme_beta = static_cast(GetAttr(kernel_node, "pme_beta")); + need_update = static_cast(GetAttr(kernel_node, "need_update")); + + auto shape_uint_crd = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + auto shape_LJtype = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); + auto shape_charge = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2); + auto shape_scaler = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3); + auto shape_nl_numbers = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 4); + auto shape_nl_serial = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 5); + auto shape_d_LJ_a = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 6); + auto shape_d_LJ_b = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 7); + + for (size_t i = 0; i < shape_uint_crd.size(); i++) ele_uint_crd *= shape_uint_crd[i]; + for (size_t i = 0; i < shape_LJtype.size(); i++) ele_LJtype *= shape_LJtype[i]; + for (size_t i = 0; i < shape_charge.size(); i++) ele_charge *= shape_charge[i]; + for (size_t i = 0; i < shape_scaler.size(); i++) ele_scaler *= shape_scaler[i]; + for (size_t i = 0; i < shape_d_LJ_a.size(); i++) ele_d_LJ_a *= shape_d_LJ_a[i]; + for (size_t i = 0; i < shape_d_LJ_b.size(); i++) ele_d_LJ_b *= shape_d_LJ_b[i]; + + InitSizeLists(); + return true; + } + + const std::vector &GetInputSizeList() const override { return input_size_list_; } + const std::vector &GetOutputSizeList() const override { return output_size_list_; } + const std::vector &GetWorkspaceSizeList() const override { return workspace_size_list_; } + + bool Launch(const std::vector &inputs, const std::vector &workspace, + const std::vector &outputs, void *stream_ptr) override { + auto uint_crd = GetDeviceAddress(inputs, 0); + auto LJtype = GetDeviceAddress(inputs, 1); + auto charge = GetDeviceAddress(inputs, 2); + auto scaler = GetDeviceAddress(inputs, 3); + auto nl_numbers = GetDeviceAddress(inputs, 4); + auto nl_serial = GetDeviceAddress(inputs, 5); + auto d_LJ_a = GetDeviceAddress(inputs, 6); + auto d_LJ_b = GetDeviceAddress(inputs, 7); + auto d_beta = GetDeviceAddress(inputs, 8); + + if (need_update) { + cudaMemcpyAsync(&pme_beta, d_beta, sizeof(float), cudaMemcpyDeviceToHost, + reinterpret_cast(stream_ptr)); + cudaStreamSynchronize(reinterpret_cast(stream_ptr)); + } + + auto uint_crd_with_LJ = GetDeviceAddress(workspace, 0); + auto nl = GetDeviceAddress(workspace, 1); + + auto frc = GetDeviceAddress(outputs, 0); + LJForceWithPMEDirectForce(atom_numbers, cutoff, pme_beta, uint_crd, LJtype, charge, scaler, uint_crd_with_LJ, + nl_numbers, nl_serial, nl, d_LJ_a, d_LJ_b, frc, + reinterpret_cast(stream_ptr)); + return true; + } + + protected: + void InitSizeLists() override { + input_size_list_.push_back(ele_uint_crd * sizeof(T1)); + input_size_list_.push_back(ele_LJtype * sizeof(T1)); + input_size_list_.push_back(ele_charge * sizeof(T)); + input_size_list_.push_back(ele_scaler * sizeof(T)); + input_size_list_.push_back(atom_numbers * sizeof(T1)); + input_size_list_.push_back(max_nl_numbers * sizeof(T1)); + input_size_list_.push_back(ele_d_LJ_a * sizeof(T)); + input_size_list_.push_back(ele_d_LJ_b * sizeof(T)); + input_size_list_.push_back(sizeof(T)); + + workspace_size_list_.push_back(atom_numbers * max_nl_numbers * sizeof(T1)); + workspace_size_list_.push_back(atom_numbers * sizeof(UINT_VECTOR_LJ_TYPE)); + + output_size_list_.push_back(atom_numbers * 3 * sizeof(T)); + } + + private: + size_t ele_uint_crd = 1; + size_t ele_LJtype = 1; + size_t ele_charge = 1; + size_t ele_scaler = 1; + size_t ele_nl = 1; + size_t ele_d_LJ_a = 1; + size_t ele_d_LJ_b = 1; + + std::vector input_size_list_; + std::vector output_size_list_; + std::vector workspace_size_list_; + int atom_numbers; + float pme_beta; + float cutoff; + int need_update; + int max_nl_numbers = 800; + struct UINT_VECTOR_LJ_TYPE { + unsigned int uint_x; + unsigned int uint_y; + unsigned int uint_z; + int LJ_type; + float charge; + }; + struct NEIGHBOR_LIST { + int atom_numbers; + int *atom_serial; + }; + struct VECTOR { + float x; + float y; + float z; + }; +}; +} // namespace kernel +} // namespace mindspore +#endif diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/lj/lj_force_with_virial_energy_update_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/lj/lj_force_with_virial_energy_update_kernel.cc new file mode 100644 index 00000000000..75a89e5e9c4 --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/lj/lj_force_with_virial_energy_update_kernel.cc @@ -0,0 +1,40 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + *Note: + * LJForceWithVirialEnergyUpdate. This is an experimental interface that is subject to change and/or deletion. + */ +#include "backend/kernel_compiler/gpu/sponge/lj/lj_force_with_virial_energy_update_kernel.h" + +namespace mindspore { +namespace kernel { +MS_REG_GPU_KERNEL_THREE(LJForceWithVirialEnergyUpdate, + KernelAttr() + .AddInputAttr(kNumberTypeUInt32) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddOutputAttr(kNumberTypeFloat32) + .AddOutputAttr(kNumberTypeFloat32) + .AddOutputAttr(kNumberTypeFloat32), + LJForceWithVirialEnergyUpdateGpuKernel, float, int, unsigned int) +} // namespace kernel +} // namespace mindspore diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/lj/lj_force_with_virial_energy_update_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/lj/lj_force_with_virial_energy_update_kernel.h new file mode 100644 index 00000000000..6e9a12567b5 --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/lj/lj_force_with_virial_energy_update_kernel.h @@ -0,0 +1,152 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + *Note: + * LJForceWithVirialEnergyUpdate. This is an experimental interface that is subject to change and/or deletion. + */ +#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_LJ_FORCE_WITH_VIRIAL_ENERGY_UPDATE_KERNEL_H_ +#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_LJ_FORCE_WITH_VIRIAL_ENERGY_UPDATE_KERNEL_H_ +#include +#include +#include +#include +#include "backend/kernel_compiler/gpu/gpu_kernel.h" +#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h" +#include "runtime/device/gpu/cuda_common.h" +#include "backend/kernel_compiler/gpu/cuda_impl/sponge/lj/lj_direct_cf_force_with_lj_virial_direct_cf_energy_impl.cuh" +namespace mindspore { +namespace kernel { +template +class LJForceWithVirialEnergyUpdateGpuKernel : public GpuKernel { + public: + LJForceWithVirialEnergyUpdateGpuKernel() : ele_uint_crd(1) {} + ~LJForceWithVirialEnergyUpdateGpuKernel() override = default; + + bool Init(const CNodePtr &kernel_node) override { + kernel_node_ = kernel_node; + atom_numbers = static_cast(GetAttr(kernel_node, "atom_numbers")); + max_neighbor_numbers = static_cast(GetAttr(kernel_node, "max_neighbor_numbers")); + cutoff = static_cast(GetAttr(kernel_node, "cutoff")); + pme_beta = static_cast(GetAttr(kernel_node, "pme_beta")); + need_update = static_cast(GetAttr(kernel_node, "need_update")); + + auto shape_uint_crd = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + auto shape_LJtype = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); + auto shape_charge = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2); + auto shape_scaler = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3); + auto shape_nl_numbers = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 4); + auto shape_nl_serial = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 5); + auto shape_d_LJ_a = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 6); + auto shape_d_LJ_b = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 7); + + for (size_t i = 0; i < shape_uint_crd.size(); i++) ele_uint_crd *= shape_uint_crd[i]; + for (size_t i = 0; i < shape_LJtype.size(); i++) ele_LJtype *= shape_LJtype[i]; + for (size_t i = 0; i < shape_charge.size(); i++) ele_charge *= shape_charge[i]; + for (size_t i = 0; i < shape_scaler.size(); i++) ele_scaler *= shape_scaler[i]; + for (size_t i = 0; i < shape_d_LJ_a.size(); i++) ele_d_LJ_a *= shape_d_LJ_a[i]; + for (size_t i = 0; i < shape_d_LJ_b.size(); i++) ele_d_LJ_b *= shape_d_LJ_b[i]; + + InitSizeLists(); + return true; + } + + const std::vector &GetInputSizeList() const override { return input_size_list_; } + const std::vector &GetOutputSizeList() const override { return output_size_list_; } + const std::vector &GetWorkspaceSizeList() const override { return workspace_size_list_; } + + bool Launch(const std::vector &inputs, const std::vector &workspace, + const std::vector &outputs, void *stream_ptr) override { + auto uint_crd = GetDeviceAddress(inputs, 0); + auto LJtype = GetDeviceAddress(inputs, 1); + auto charge = GetDeviceAddress(inputs, 2); + auto scaler = GetDeviceAddress(inputs, 3); + auto nl_numbers = GetDeviceAddress(inputs, 4); + auto nl_serial = GetDeviceAddress(inputs, 5); + auto d_LJ_a = GetDeviceAddress(inputs, 6); + auto d_LJ_b = GetDeviceAddress(inputs, 7); + auto d_beta = GetDeviceAddress(inputs, 8); + + auto uint_crd_with_LJ = GetDeviceAddress(workspace, 0); + auto nl = GetDeviceAddress(workspace, 1); + + auto frc = GetDeviceAddress(outputs, 0); + auto atom_lj_virial = GetDeviceAddress(outputs, 1); + auto atom_energy = GetDeviceAddress(outputs, 2); + + if (need_update) { + cudaMemcpyAsync(&pme_beta, d_beta, sizeof(float), cudaMemcpyDeviceToHost, + reinterpret_cast(stream_ptr)); + cudaStreamSynchronize(reinterpret_cast(stream_ptr)); + } + + LJ_Direct_CF_Force_With_LJ_Virial_Direct_CF_Energy(atom_numbers, cutoff, pme_beta, uint_crd, LJtype, charge, scaler, + uint_crd_with_LJ, nl_numbers, nl_serial, nl, d_LJ_a, d_LJ_b, frc, + atom_lj_virial, atom_energy, max_neighbor_numbers, + reinterpret_cast(stream_ptr)); + return true; + } + + protected: + void InitSizeLists() override { + input_size_list_.push_back(ele_uint_crd * sizeof(T2)); + input_size_list_.push_back(ele_LJtype * sizeof(T1)); + input_size_list_.push_back(ele_charge * sizeof(T)); + input_size_list_.push_back(ele_scaler * sizeof(T)); + input_size_list_.push_back(atom_numbers * sizeof(T1)); + input_size_list_.push_back(max_neighbor_numbers * sizeof(T1)); + input_size_list_.push_back(ele_d_LJ_a * sizeof(T)); + input_size_list_.push_back(ele_d_LJ_b * sizeof(T)); + + workspace_size_list_.push_back(atom_numbers * max_neighbor_numbers * sizeof(T1)); + workspace_size_list_.push_back(atom_numbers * sizeof(UINT_VECTOR_LJ_TYPE)); + + output_size_list_.push_back(atom_numbers * 3 * sizeof(T)); + output_size_list_.push_back(atom_numbers * sizeof(T)); + output_size_list_.push_back(atom_numbers * sizeof(T)); + } + + private: + size_t ele_uint_crd = 1; + size_t ele_LJtype = 1; + size_t ele_charge = 1; + size_t ele_scaler = 1; + size_t ele_nl = 1; + size_t ele_d_LJ_a = 1; + size_t ele_d_LJ_b = 1; + + std::vector input_size_list_; + std::vector output_size_list_; + std::vector workspace_size_list_; + int atom_numbers; + int max_neighbor_numbers; + float pme_beta; + float cutoff; + int need_update; + struct UINT_VECTOR_LJ_TYPE { + unsigned int uint_x; + unsigned int uint_y; + unsigned int uint_z; + int LJ_type; + float charge; + }; + struct NEIGHBOR_LIST { + int atom_numbers; + int *atom_serial; + }; +}; +} // namespace kernel +} // namespace mindspore +#endif diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nvtit/md_iteration_leap_frog_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nvtit/md_iteration_leap_frog_kernel.h index a5c06f60a61..a492d98fdef 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nvtit/md_iteration_leap_frog_kernel.h +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nvtit/md_iteration_leap_frog_kernel.h @@ -13,14 +13,19 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +/** + *Note: + * MDIterationLeapFrog. This is an experimental interface that is subject to change and/or deletion. + */ -#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_NVTIT_MD_ITERATION_LEAP_FROG_KERNEL_H_ -#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_NVTIT_MD_ITERATION_LEAP_FROG_KERNEL_H_ +#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_MD_ITERATION_LEAP_FROG_KERNEL_H_ +#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_MD_ITERATION_LEAP_FROG_KERNEL_H_ #include -#include -#include #include +#include +#include + #include "backend/kernel_compiler/gpu/gpu_kernel.h" #include "backend/kernel_compiler/gpu/gpu_kernel_factory.h" #include "runtime/device/gpu/cuda_common.h" @@ -31,25 +36,13 @@ namespace kernel { template class MDIterationLeapFrogGpuKernel : public GpuKernel { public: - MDIterationLeapFrogGpuKernel() : ele_mass_inverse(1) {} + MDIterationLeapFrogGpuKernel() {} ~MDIterationLeapFrogGpuKernel() override = default; bool Init(const CNodePtr &kernel_node) override { kernel_node_ = kernel_node; - float4_numbers = static_cast(GetAttr(kernel_node, "float4_numbers")); atom_numbers = static_cast(GetAttr(kernel_node, "atom_numbers")); - half_dt = static_cast(GetAttr(kernel_node, "half_dt")); dt = static_cast(GetAttr(kernel_node, "dt")); - exp_gamma = static_cast(GetAttr(kernel_node, "exp_gamma")); - is_max_velocity = static_cast(GetAttr(kernel_node, "is_max_velocity")); - max_velocity = static_cast(GetAttr(kernel_node, "max_velocity")); - - auto shape_mass_inverse = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); - auto shape_qrt_mass = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); - - for (size_t i = 0; i < shape_mass_inverse.size(); i++) ele_mass_inverse *= shape_mass_inverse[i]; - for (size_t i = 0; i < shape_qrt_mass.size(); i++) ele_sqrt_mass *= shape_qrt_mass[i]; - InitSizeLists(); return true; } @@ -58,49 +51,36 @@ class MDIterationLeapFrogGpuKernel : public GpuKernel { const std::vector &GetOutputSizeList() const override { return output_size_list_; } const std::vector &GetWorkspaceSizeList() const override { return workspace_size_list_; } - bool Launch(const std::vector &inputs, const std::vector &, + bool Launch(const std::vector &inputs, const std::vector &workspace, const std::vector &outputs, void *stream_ptr) override { - auto d_mass_inverse = GetDeviceAddress(inputs, 0); - auto d_sqrt_mass = GetDeviceAddress(inputs, 1); - - auto vel_f = GetDeviceAddress(outputs, 0); - auto crd_f = GetDeviceAddress(outputs, 1); - auto frc_f = GetDeviceAddress(outputs, 2); - auto acc_f = GetDeviceAddress(outputs, 3); - - MDIterationLeapFrog(float4_numbers, atom_numbers, half_dt, dt, exp_gamma, is_max_velocity, max_velocity, - d_mass_inverse, d_sqrt_mass, vel_f, crd_f, frc_f, acc_f, - reinterpret_cast(stream_ptr)); + auto vel = GetDeviceAddress(inputs, 0); + auto crd = GetDeviceAddress(inputs, 1); + auto frc = GetDeviceAddress(inputs, 2); + auto acc = GetDeviceAddress(inputs, 3); + auto inverse_mass = GetDeviceAddress(inputs, 4); + MDIterationLeapFrog(atom_numbers, vel, crd, frc, acc, inverse_mass, dt, reinterpret_cast(stream_ptr)); return true; } protected: void InitSizeLists() override { - input_size_list_.push_back(ele_mass_inverse * sizeof(T)); - input_size_list_.push_back(ele_sqrt_mass * sizeof(T)); + input_size_list_.push_back(atom_numbers * 3 * sizeof(T)); + input_size_list_.push_back(atom_numbers * 3 * sizeof(T)); + input_size_list_.push_back(atom_numbers * 3 * sizeof(T)); + input_size_list_.push_back(atom_numbers * 3 * sizeof(T)); + input_size_list_.push_back(atom_numbers * sizeof(T)); - output_size_list_.push_back(3 * atom_numbers * sizeof(T)); - output_size_list_.push_back(3 * atom_numbers * sizeof(T)); - output_size_list_.push_back(3 * atom_numbers * sizeof(T)); - output_size_list_.push_back(3 * atom_numbers * sizeof(T)); + output_size_list_.push_back(sizeof(T)); } private: - size_t ele_mass_inverse = 1; - size_t ele_sqrt_mass = 1; - std::vector input_size_list_; std::vector output_size_list_; std::vector workspace_size_list_; - int float4_numbers; int atom_numbers; - float half_dt; float dt; - float exp_gamma; - int is_max_velocity; - float max_velocity; }; } // namespace kernel } // namespace mindspore -#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_NVTIT_MD_ITERATION_LEAP_FROG_KERNEL_H_ +#endif diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/pme/pme_energy_update_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/pme/pme_energy_update_kernel.cc new file mode 100644 index 00000000000..59bc1749988 --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/pme/pme_energy_update_kernel.cc @@ -0,0 +1,42 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + *Note: + * PMEEnergyUpdate. This is an experimental interface that is subject to change and/or deletion. + */ +#include "backend/kernel_compiler/gpu/sponge/pme/pme_energy_update_kernel.h" + +namespace mindspore { +namespace kernel { +MS_REG_GPU_KERNEL_TWO(PMEEnergyUpdate, + KernelAttr() + .AddInputAttr(kNumberTypeUInt32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddOutputAttr(kNumberTypeFloat32) + .AddOutputAttr(kNumberTypeFloat32) + .AddOutputAttr(kNumberTypeFloat32) + .AddOutputAttr(kNumberTypeFloat32), + PMEEnergyUpdateGpuKernel, float, int) +} // namespace kernel +} // namespace mindspore diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/pme/pme_energy_update_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/pme/pme_energy_update_kernel.h new file mode 100644 index 00000000000..1cb7a8273c7 --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/pme/pme_energy_update_kernel.h @@ -0,0 +1,313 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + *Note: + * PMEEnergyUpdate. This is an experimental interface that is subject to change and/or deletion. + */ +#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONGE_PME_PME_ENERGY_UPDATE_KERNEL_H_ +#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONGE_PME_PME_ENERGY_UPDATE_KERNEL_H_ +#include +#include +#include +#include "backend/kernel_compiler/gpu/gpu_kernel.h" +#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h" +#include "runtime/device/gpu/cuda_common.h" +#include "backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_energy_update_impl.cuh" + +namespace mindspore { +namespace kernel { +template +class PMEEnergyUpdateGpuKernel : public GpuKernel { + public: + PMEEnergyUpdateGpuKernel() : ele_uint_crd(1) {} + ~PMEEnergyUpdateGpuKernel() override = default; + + bool Init(const CNodePtr &kernel_node) override { + kernel_node_ = kernel_node; + atom_numbers = static_cast(GetAttr(kernel_node, "atom_numbers")); + excluded_numbers = static_cast(GetAttr(kernel_node, "excluded_numbers")); + beta = static_cast(GetAttr(kernel_node, "beta")); + fftx = static_cast(GetAttr(kernel_node, "fftx")); + ffty = static_cast(GetAttr(kernel_node, "ffty")); + fftz = static_cast(GetAttr(kernel_node, "fftz")); + + float box_length_0 = static_cast(GetAttr(kernel_node, "box_length_0")); + float box_length_1 = static_cast(GetAttr(kernel_node, "box_length_1")); + float box_length_2 = static_cast(GetAttr(kernel_node, "box_length_2")); + max_neighbor_numbers = static_cast(GetAttr(kernel_node, "max_neighbor_numbers")); + need_update = static_cast(GetAttr(kernel_node, "need_update")); + + cufftPlan3d(&PME_plan_r2c, fftx, ffty, fftz, CUFFT_R2C); + cufftPlan3d(&PME_plan_c2r, fftx, ffty, fftz, CUFFT_C2R); + _thread_PME.x = 8; + _thread_PME.y = 8; + PME_Nin = ffty * fftz; + PME_Nfft = fftx * ffty * (fftz / 2 + 1); + PME_Nall = fftx * ffty * fftz; + PME_kxyz_cpu.resize(64); + + std::vector h_box_length{box_length_0, box_length_1, box_length_2}; + VECTOR *box_length = reinterpret_cast(h_box_length.data()); + + volume = box_length[0].x * box_length[0].y * box_length[0].z; + int kx, ky, kz, kxrp, kyrp, kzrp, index; + for (kx = 0; kx < 4; kx++) { + for (ky = 0; ky < 4; ky++) { + for (kz = 0; kz < 4; kz++) { + index = kx * 16 + ky * 4 + kz; + PME_kxyz_cpu[index].uint_x = kx; + PME_kxyz_cpu[index].uint_y = ky; + PME_kxyz_cpu[index].uint_z = kz; + } + } + } + + B1.resize(fftx); + B2.resize(ffty); + B3.resize(fftz); + PME_BC0.resize(PME_Nfft); + for (kx = 0; kx < fftx; kx++) { + B1[kx] = getb(kx, fftx, 4); + } + + for (ky = 0; ky < ffty; ky++) { + B2[ky] = getb(ky, ffty, 4); + } + + for (kz = 0; kz < fftz; kz++) { + B3[kz] = getb(kz, fftz, 4); + } + float mprefactor = PI * PI / -beta / beta; + + float msq; + for (kx = 0; kx < fftx; kx++) { + kxrp = kx; + if (kx > fftx / 2) kxrp = fftx - kx; + for (ky = 0; ky < ffty; ky++) { + kyrp = ky; + if (ky > ffty / 2) kyrp = ffty - ky; + for (kz = 0; kz <= fftz / 2; kz++) { + kzrp = kz; + + msq = kxrp * kxrp / box_length[0].x / box_length[0].x + kyrp * kyrp / box_length[0].y / box_length[0].y + + kzrp * kzrp / box_length[0].z / box_length[0].z; + index = kx * ffty * (fftz / 2 + 1) + ky * (fftz / 2 + 1) + kz; + if ((kx + ky + kz) == 0) { + PME_BC0[index] = 0; + } else { + PME_BC0[index] = 1.0 / PI / msq * exp(mprefactor * msq) / volume; + } + + PME_BC0[index] *= B1[kx] * B2[ky] * B3[kz]; + } + } + } + + InitSizeLists(); + return true; + } + + const std::vector &GetInputSizeList() const override { return input_size_list_; } + const std::vector &GetOutputSizeList() const override { return output_size_list_; } + const std::vector &GetWorkspaceSizeList() const override { return workspace_size_list_; } + + bool Launch(const std::vector &inputs, const std::vector &workspace, + const std::vector &outputs, void *stream_ptr) override { + auto uint_crd = GetDeviceAddress(inputs, 0); + auto charge = GetDeviceAddress(inputs, 1); + auto nl_numbers = GetDeviceAddress(inputs, 2); + auto nl_serial = GetDeviceAddress(inputs, 3); + auto scaler = GetDeviceAddress(inputs, 4); + auto excluded_list_start = GetDeviceAddress(inputs, 5); + auto excluded_list = GetDeviceAddress(inputs, 6); + auto excluded_atom_numbers = GetDeviceAddress(inputs, 7); + auto neutralizing_factor = GetDeviceAddress(inputs, 8); + auto d_beta = GetDeviceAddress(inputs, 9); + + auto pme_uxyz = GetDeviceAddress(workspace, 0); // workspace + auto pme_frxyz = GetDeviceAddress(workspace, 1); // workspace + auto pme_q = GetDeviceAddress(workspace, 2); // workspace + auto pme_fq = GetDeviceAddress(workspace, 3); // workspace + auto pme_atom_near = GetDeviceAddress(workspace, 4); // workspace + auto pme_bc = GetDeviceAddress(workspace, 5); // workspace + auto pme_kxyz = GetDeviceAddress(workspace, 6); // workspace + auto nl = GetDeviceAddress(workspace, 7); + auto charge_sum = GetDeviceAddress(workspace, 8); + + auto reciprocal_ene = GetDeviceAddress(outputs, 0); + auto self_ene = GetDeviceAddress(outputs, 1); + auto direct_ene = GetDeviceAddress(outputs, 2); + auto correction_ene = GetDeviceAddress(outputs, 3); + + h_beta = beta; + if (need_update) { + cudaMemcpyAsync(&h_beta, d_beta, sizeof(float), cudaMemcpyDeviceToHost, + reinterpret_cast(stream_ptr)); + cudaStreamSynchronize(reinterpret_cast(stream_ptr)); + factor = h_beta / beta; + double factor_inverse = 1.0 / factor; + + for (int i = 0; i < PME_Nfft; i++) { + PME_BC0[i] *= factor_inverse; // update PME_BC0 + } + } + + cufftSetStream(PME_plan_r2c, reinterpret_cast(stream_ptr)); + cufftSetStream(PME_plan_c2r, reinterpret_cast(stream_ptr)); + cudaMemcpyAsync(pme_kxyz, PME_kxyz_cpu.data(), sizeof(UNSIGNED_INT_VECTOR) * 64, cudaMemcpyHostToDevice, + reinterpret_cast(stream_ptr)); + cudaMemcpyAsync(pme_bc, PME_BC0.data(), sizeof(float) * PME_Nfft, cudaMemcpyHostToDevice, + reinterpret_cast(stream_ptr)); + + PMEEnergyUpdate(fftx, ffty, fftz, atom_numbers, h_beta, pme_bc, pme_uxyz, pme_frxyz, pme_q, pme_fq, pme_atom_near, + pme_kxyz, uint_crd, charge, nl_numbers, nl_serial, nl, scaler, excluded_list_start, excluded_list, + excluded_atom_numbers, reciprocal_ene, self_ene, direct_ene, correction_ene, _thread_PME, PME_Nin, + PME_Nfft, PME_Nall, PME_plan_r2c, PME_plan_c2r, neutralizing_factor, charge_sum, + max_neighbor_numbers, reinterpret_cast(stream_ptr)); + return true; + } + + protected: + void InitSizeLists() override { + input_size_list_.push_back(atom_numbers * sizeof(UNSIGNED_INT_VECTOR)); + input_size_list_.push_back(atom_numbers * sizeof(VECTOR)); + input_size_list_.push_back(atom_numbers * sizeof(T1)); + input_size_list_.push_back(max_neighbor_numbers * sizeof(T1)); + input_size_list_.push_back(sizeof(VECTOR)); + + input_size_list_.push_back(atom_numbers * sizeof(T1)); + input_size_list_.push_back(excluded_numbers * sizeof(T1)); + input_size_list_.push_back(atom_numbers * sizeof(T1)); + input_size_list_.push_back(sizeof(T)); + input_size_list_.push_back(sizeof(T)); + + workspace_size_list_.push_back(atom_numbers * sizeof(UNSIGNED_INT_VECTOR)); + workspace_size_list_.push_back(atom_numbers * sizeof(VECTOR)); + workspace_size_list_.push_back(PME_Nall * sizeof(T)); + workspace_size_list_.push_back(PME_Nfft * sizeof(cufftComplex)); + workspace_size_list_.push_back(atom_numbers * 64 * sizeof(int)); + workspace_size_list_.push_back(PME_Nfft * sizeof(float)); + workspace_size_list_.push_back(64 * sizeof(UNSIGNED_INT_VECTOR)); + workspace_size_list_.push_back(atom_numbers * max_neighbor_numbers * sizeof(T1)); + workspace_size_list_.push_back(sizeof(T)); + + output_size_list_.push_back(sizeof(T)); + output_size_list_.push_back(sizeof(T)); + output_size_list_.push_back(sizeof(T)); + output_size_list_.push_back(sizeof(T)); + } + + cufftComplex expc(cufftComplex z) { + cufftComplex res; + float t = expf(z.x); + sincosf(z.y, &res.y, &res.x); + res.x *= t; + res.y *= t; + return res; + } + + float M_(float u, int n) { + if (n == 2) { + if (u > 2 || u < 0) return 0; + return 1 - abs(u - 1); + } else { + return u / (n - 1) * M_(u, n - 1) + (n - u) / (n - 1) * M_(u - 1, n - 1); + } + } + + float getb(int k, int NFFT, int B_order) { + cufftComplex tempc, tempc2, res; + float tempf; + tempc2.x = 0; + tempc2.y = 0; + + tempc.x = 0; + if (NFFT == 0) { + MS_LOG(ERROR) << "Divide by zero."; + } else { + tempc.y = 2 * (B_order - 1) * PI * k / NFFT; + } + res = expc(tempc); + + for (int kk = 0; kk < (B_order - 1); kk++) { + tempc.x = 0; + if (NFFT == 0) { + MS_LOG(ERROR) << "Divide by zero."; + break; + } else { + tempc.y = 2 * PI * k / NFFT * kk; + } + tempc = expc(tempc); + tempf = M_(kk + 1, B_order); + tempc2.x += tempf * tempc.x; + tempc2.y += tempf * tempc.y; + } + res = cuCdivf(res, tempc2); + return res.x * res.x + res.y * res.y; + } + + private: + size_t ele_uint_crd = 1; + + std::vector input_size_list_; + std::vector output_size_list_; + std::vector workspace_size_list_; + + std::vector B1; + std::vector B2; + std::vector B3; + std::vector PME_BC0; + + int atom_numbers; + int excluded_numbers; + int max_neighbor_numbers; + int fftx; + int ffty; + int fftz; + float beta; + float factor; + float h_beta; + int PME_Nin; + int PME_Nall; + int PME_Nfft; + float volume; + float PI = 3.1415926; + int need_update; + cufftHandle PME_plan_r2c; + cufftHandle PME_plan_c2r; + + dim3 _thread_PME; + + struct VECTOR { + float x; + float y; + float z; + }; + + struct UNSIGNED_INT_VECTOR { + unsigned int uint_x; + unsigned int uint_y; + unsigned int uint_z; + }; + std::vector PME_kxyz_cpu; + struct NEIGHBOR_LIST { + int atom_numbers; + int *atom_serial; + }; +}; +} // namespace kernel +} // namespace mindspore +#endif diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/pme/pme_excluded_force_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/pme/pme_excluded_force_kernel.h index b7becdeaaa6..0553299ae02 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/pme/pme_excluded_force_kernel.h +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/pme/pme_excluded_force_kernel.h @@ -13,17 +13,20 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +/** + *Note: + * PMEExcludedForce. This is an experimental interface that is subject to change and/or deletion. + */ #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONGE_PME_PME_EXCLUDED_FORCE_KERNEL_H_ #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONGE_PME_PME_EXCLUDED_FORCE_KERNEL_H_ #include #include #include -#include -#include #include "backend/kernel_compiler/gpu/gpu_kernel.h" #include "backend/kernel_compiler/gpu/gpu_kernel_factory.h" #include "runtime/device/gpu/cuda_common.h" #include "backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_excluded_force_impl.cuh" + namespace mindspore { namespace kernel { template diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/pme/pme_excluded_force_update_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/pme/pme_excluded_force_update_kernel.cc new file mode 100644 index 00000000000..d58f266ef89 --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/pme/pme_excluded_force_update_kernel.cc @@ -0,0 +1,36 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + *Note: + * PMEExcludedForceUpdate. This is an experimental interface that is subject to change and/or deletion. + */ +#include "backend/kernel_compiler/gpu/sponge/pme/pme_excluded_force_update_kernel.h" + +namespace mindspore { +namespace kernel { +MS_REG_GPU_KERNEL_TWO(PMEExcludedForceUpdate, + KernelAttr() + .AddInputAttr(kNumberTypeUInt32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeFloat32) + .AddOutputAttr(kNumberTypeFloat32), + PMEExcludedForceUpdateGpuKernel, float, int) +} // namespace kernel +} // namespace mindspore diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/pme/pme_excluded_force_update_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/pme/pme_excluded_force_update_kernel.h new file mode 100644 index 00000000000..f2a78749394 --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/pme/pme_excluded_force_update_kernel.h @@ -0,0 +1,112 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + *Note: + * PMEExcludedForceUpdate. This is an experimental interface that is subject to change and/or deletion. + */ +#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONGE_PME_PME_EXCLUDED_FORCE_KERNEL_H_ +#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONGE_PME_PME_EXCLUDED_FORCE_KERNEL_H_ + +#include +#include +#include +#include +#include +#include "runtime/device/gpu/cuda_common.h" +#include "backend/kernel_compiler/gpu/gpu_kernel.h" +#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h" +#include "backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_excluded_force_impl.cuh" + +namespace mindspore { +namespace kernel { +template +class PMEExcludedForceUpdateGpuKernel : public GpuKernel { + public: + PMEExcludedForceUpdateGpuKernel() : ele_uint_crd(1) {} + ~PMEExcludedForceUpdateGpuKernel() override = default; + + bool Init(const CNodePtr &kernel_node) override { + kernel_node_ = kernel_node; + atom_numbers = static_cast(GetAttr(kernel_node, "atom_numbers")); + excluded_numbers = static_cast(GetAttr(kernel_node, "excluded_numbers")); + beta = static_cast(GetAttr(kernel_node, "beta")); + need_update = static_cast(GetAttr(kernel_node, "need_update")); + InitSizeLists(); + return true; + } + + const std::vector &GetInputSizeList() const override { return input_size_list_; } + const std::vector &GetOutputSizeList() const override { return output_size_list_; } + const std::vector &GetWorkspaceSizeList() const override { return workspace_size_list_; } + + bool Launch(const std::vector &inputs, const std::vector &workspace, + const std::vector &outputs, void *stream_ptr) override { + auto uint_crd = GetDeviceAddress(inputs, 0); + auto sacler = GetDeviceAddress(inputs, 1); + auto charge = GetDeviceAddress(inputs, 2); + auto excluded_list_start = GetDeviceAddress(inputs, 3); + auto excluded_list = GetDeviceAddress(inputs, 4); + auto excluded_atom_numbers = GetDeviceAddress(inputs, 5); + auto d_beta = GetDeviceAddress(inputs, 6); + if (need_update) { + cudaMemcpyAsync(&beta, d_beta, sizeof(float), cudaMemcpyDeviceToHost, reinterpret_cast(stream_ptr)); + cudaStreamSynchronize(reinterpret_cast(stream_ptr)); + } + + auto force = GetDeviceAddress(outputs, 0); + PMEExcludedForce(atom_numbers, beta, uint_crd, sacler, charge, excluded_list_start, excluded_list, + excluded_atom_numbers, force, reinterpret_cast(stream_ptr)); + return true; + } + + protected: + void InitSizeLists() override { + input_size_list_.push_back(atom_numbers * sizeof(UNSIGNED_INT_VECTOR)); + input_size_list_.push_back(sizeof(VECTOR)); + input_size_list_.push_back(atom_numbers * sizeof(T)); + input_size_list_.push_back(atom_numbers * sizeof(T1)); + input_size_list_.push_back(excluded_numbers * sizeof(T1)); + input_size_list_.push_back(atom_numbers * sizeof(T1)); + input_size_list_.push_back(sizeof(T)); + + output_size_list_.push_back(atom_numbers * 3 * sizeof(T)); + } + + private: + size_t ele_uint_crd = 1; + std::vector input_size_list_; + std::vector output_size_list_; + std::vector workspace_size_list_; + int atom_numbers; + int excluded_numbers; + float beta; + float factor; + int need_update; + struct VECTOR { + float x; + float y; + float z; + }; + + struct UNSIGNED_INT_VECTOR { + unsigned int uint_x; + unsigned int uint_y; + unsigned int uint_z; + }; +}; +} // namespace kernel +} // namespace mindspore +#endif diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/pme/pme_reciprocal_force_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/pme/pme_reciprocal_force_kernel.h index a0927c24a03..e6b51692762 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/pme/pme_reciprocal_force_kernel.h +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/pme/pme_reciprocal_force_kernel.h @@ -13,6 +13,10 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +/** + *Note: + * PMEReciprocalForce. This is an experimental interface that is subject to change and/or deletion. + */ #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONGE_PME_PME_RECIPROCAL_FORCE_KERNEL_H_ #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONGE_PME_PME_RECIPROCAL_FORCE_KERNEL_H_ #include @@ -144,9 +148,8 @@ class PMEReciprocalForceGpuKernel : public GpuKernel { protected: void InitSizeLists() override { - input_size_list_.push_back(sizeof(VECTOR)); input_size_list_.push_back(atom_numbers * sizeof(UNSIGNED_INT_VECTOR)); - input_size_list_.push_back(atom_numbers * sizeof(VECTOR)); + input_size_list_.push_back(atom_numbers * sizeof(float)); workspace_size_list_.push_back(atom_numbers * sizeof(UNSIGNED_INT_VECTOR)); workspace_size_list_.push_back(atom_numbers * sizeof(VECTOR)); diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/pme/pme_reciprocal_force_update_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/pme/pme_reciprocal_force_update_kernel.cc new file mode 100644 index 00000000000..0c97c834066 --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/pme/pme_reciprocal_force_update_kernel.cc @@ -0,0 +1,32 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + *Note: + * PMEReciprocalForceUpdate. This is an experimental interface that is subject to change and/or deletion. + */ +#include "backend/kernel_compiler/gpu/sponge/pme/pme_reciprocal_force_update_kernel.h" + +namespace mindspore { +namespace kernel { +MS_REG_GPU_KERNEL_TWO(PMEReciprocalForceUpdate, + KernelAttr() + .AddInputAttr(kNumberTypeUInt32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddOutputAttr(kNumberTypeFloat32), + PMEReciprocalForceUpdateGpuKernel, float, int) +} // namespace kernel +} // namespace mindspore diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/pme/pme_reciprocal_force_update_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/pme/pme_reciprocal_force_update_kernel.h new file mode 100644 index 00000000000..7bc825a6e5b --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/pme/pme_reciprocal_force_update_kernel.h @@ -0,0 +1,280 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + *Note: + * PMEReciprocalForceUpdate. This is an experimental interface that is subject to change and/or deletion. + */ +#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONGE_PME_PME_RECIPROCAL_FORCE_UPDATE_KERNEL_H_ +#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONGE_PME_PME_RECIPROCAL_FORCE_UPDATE_KERNEL_H_ +#include +#include +#include +#include "backend/kernel_compiler/gpu/gpu_kernel.h" +#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h" +#include "runtime/device/gpu/cuda_common.h" +#include "backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_reciprocal_force_impl.cuh" + +namespace mindspore { +namespace kernel { +template +class PMEReciprocalForceUpdateGpuKernel : public GpuKernel { + public: + PMEReciprocalForceUpdateGpuKernel() : ele_uint_crd(1) {} + ~PMEReciprocalForceUpdateGpuKernel() override = default; + + bool Init(const CNodePtr &kernel_node) override { + kernel_node_ = kernel_node; + atom_numbers = static_cast(GetAttr(kernel_node, "atom_numbers")); + beta = static_cast(GetAttr(kernel_node, "beta")); + fftx = static_cast(GetAttr(kernel_node, "fftx")); + ffty = static_cast(GetAttr(kernel_node, "ffty")); + fftz = static_cast(GetAttr(kernel_node, "fftz")); + + float box_length_0 = static_cast(GetAttr(kernel_node, "box_length_0")); + float box_length_1 = static_cast(GetAttr(kernel_node, "box_length_1")); + float box_length_2 = static_cast(GetAttr(kernel_node, "box_length_2")); + + need_update = static_cast(GetAttr(kernel_node, "need_update")); + PME_Nall = fftx * ffty * fftz; + PME_Nfft = fftx * ffty * (fftz / 2 + 1); + PME_Nin = ffty * fftz; + cufftPlan3d(&PME_plan_r2c, fftx, ffty, fftz, CUFFT_R2C); + cufftPlan3d(&PME_plan_c2r, fftx, ffty, fftz, CUFFT_C2R); + + std::vector h_box_length{box_length_0, box_length_1, box_length_2}; + VECTOR *box_length = reinterpret_cast(h_box_length.data()); + + PME_inverse_box_vector0.x = static_cast(fftx) / box_length[0].x; + PME_inverse_box_vector0.y = static_cast(ffty) / box_length[0].y; + PME_inverse_box_vector0.z = static_cast(fftz) / box_length[0].z; + PME_inverse_box_vector.x = PME_inverse_box_vector0.x; + PME_inverse_box_vector.y = PME_inverse_box_vector0.y; + PME_inverse_box_vector.z = PME_inverse_box_vector0.z; + float volume = box_length[0].x * box_length[0].y * box_length[0].z; + PME_kxyz_cpu.resize(64); + int kx, ky, kz, kxrp, kyrp, kzrp, index; + for (kx = 0; kx < 4; kx++) { + for (ky = 0; ky < 4; ky++) { + for (kz = 0; kz < 4; kz++) { + index = kx * 16 + ky * 4 + kz; + PME_kxyz_cpu[index].uint_x = kx; + PME_kxyz_cpu[index].uint_y = ky; + PME_kxyz_cpu[index].uint_z = kz; + } + } + } + B1.resize(fftx); + B2.resize(ffty); + B3.resize(fftz); + PME_BC0.resize(PME_Nfft); + + for (kx = 0; kx < fftx; kx++) { + B1[kx] = getb(kx, fftx, 4); + } + + for (ky = 0; ky < ffty; ky++) { + B2[ky] = getb(ky, ffty, 4); + } + + for (kz = 0; kz < fftz; kz++) { + B3[kz] = getb(kz, fftz, 4); + } + float mprefactor = PI * PI / -beta / beta; + float msq; + for (kx = 0; kx < fftx; kx++) { + kxrp = kx; + if (kx > fftx / 2) kxrp = fftx - kx; + for (ky = 0; ky < ffty; ky++) { + kyrp = ky; + if (ky > ffty / 2) kyrp = ffty - ky; + for (kz = 0; kz <= fftz / 2; kz++) { + kzrp = kz; + + msq = kxrp * kxrp / box_length[0].x / box_length[0].x + kyrp * kyrp / box_length[0].y / box_length[0].y + + kzrp * kzrp / box_length[0].z / box_length[0].z; + index = kx * ffty * (fftz / 2 + 1) + ky * (fftz / 2 + 1) + kz; + if ((kx + ky + kz) == 0) { + PME_BC0[index] = 0; + } else { + PME_BC0[index] = 1.0 / PI / msq * exp(mprefactor * msq) / volume; + } + + PME_BC0[index] *= B1[kx] * B2[ky] * B3[kz]; + } + } + } + PME_BC = PME_BC0; + InitSizeLists(); + return true; + } + + const std::vector &GetInputSizeList() const override { return input_size_list_; } + const std::vector &GetOutputSizeList() const override { return output_size_list_; } + const std::vector &GetWorkspaceSizeList() const override { return workspace_size_list_; } + + bool Launch(const std::vector &inputs, const std::vector &workspace, + const std::vector &outputs, void *stream_ptr) override { + auto uint_crd = GetDeviceAddress(inputs, 0); + auto charge = GetDeviceAddress(inputs, 1); + auto d_beta = GetDeviceAddress(inputs, 2); + + auto pme_uxyz = GetDeviceAddress(workspace, 0); // workspace + auto pme_frxyz = GetDeviceAddress(workspace, 1); // workspace + auto pme_q = GetDeviceAddress(workspace, 2); // workspace + auto pme_fq = GetDeviceAddress(workspace, 3); // workspace + auto pme_atom_near = GetDeviceAddress(workspace, 4); // workspace + auto pme_bc = GetDeviceAddress(workspace, 5); // workspace + auto pme_kxyz = GetDeviceAddress(workspace, 6); // workspace + + auto force = GetDeviceAddress(outputs, 0); + h_beta = beta; + if (need_update) { + cudaMemcpyAsync(&h_beta, d_beta, sizeof(float), cudaMemcpyDeviceToHost, + reinterpret_cast(stream_ptr)); + cudaStreamSynchronize(reinterpret_cast(stream_ptr)); + factor = h_beta / beta; + double factor_inverse = 1.0 / factor; + + PME_inverse_box_vector.x = PME_inverse_box_vector0.x * factor_inverse; + PME_inverse_box_vector.y = PME_inverse_box_vector0.y * factor_inverse; + PME_inverse_box_vector.z = PME_inverse_box_vector0.z * factor_inverse; + + for (int i = 0; i < PME_Nfft; i++) { + PME_BC[i] = PME_BC0[i] * factor_inverse; + } + } + + cufftSetStream(PME_plan_r2c, reinterpret_cast(stream_ptr)); + cufftSetStream(PME_plan_c2r, reinterpret_cast(stream_ptr)); + cudaMemcpyAsync(pme_kxyz, PME_kxyz_cpu.data(), sizeof(UNSIGNED_INT_VECTOR) * 64, cudaMemcpyHostToDevice, + reinterpret_cast(stream_ptr)); + cudaMemcpyAsync(pme_bc, PME_BC.data(), sizeof(float) * PME_Nfft, cudaMemcpyHostToDevice, + reinterpret_cast(stream_ptr)); + PMEReciprocalForce(fftx, ffty, fftz, atom_numbers, beta, pme_bc, pme_uxyz, pme_frxyz, pme_q, pme_fq, pme_atom_near, + pme_kxyz, uint_crd, charge, force, PME_Nin, PME_Nall, PME_Nfft, PME_plan_r2c, PME_plan_c2r, + PME_inverse_box_vector, reinterpret_cast(stream_ptr)); + return true; + } + + protected: + void InitSizeLists() override { + input_size_list_.push_back(atom_numbers * sizeof(UNSIGNED_INT_VECTOR)); + input_size_list_.push_back(atom_numbers * sizeof(float)); + input_size_list_.push_back(sizeof(float)); + + workspace_size_list_.push_back(atom_numbers * sizeof(UNSIGNED_INT_VECTOR)); + workspace_size_list_.push_back(atom_numbers * sizeof(VECTOR)); + workspace_size_list_.push_back(PME_Nall * sizeof(T)); + workspace_size_list_.push_back(PME_Nfft * sizeof(cufftComplex)); + workspace_size_list_.push_back(atom_numbers * 64 * sizeof(int)); + workspace_size_list_.push_back(PME_Nfft * sizeof(float)); + workspace_size_list_.push_back(64 * sizeof(UNSIGNED_INT_VECTOR)); + + output_size_list_.push_back(atom_numbers * sizeof(VECTOR)); + } + + cufftComplex expc(cufftComplex z) { + cufftComplex res; + float t = expf(z.x); + sincosf(z.y, &res.y, &res.x); + res.x *= t; + res.y *= t; + return res; + } + + float M_(float u, int n) { + if (n == 2) { + if (u > 2 || u < 0) return 0; + return 1 - abs(u - 1); + } else { + return u / (n - 1) * M_(u, n - 1) + (n - u) / (n - 1) * M_(u - 1, n - 1); + } + } + + float getb(int k, int NFFT, int B_order) { + cufftComplex tempc, tempc2, res; + float tempf; + tempc2.x = 0; + tempc2.y = 0; + + tempc.x = 0; + if (NFFT == 0) { + MS_LOG(ERROR) << "Divide by zero."; + } else { + tempc.y = 2 * (B_order - 1) * PI * k / NFFT; + } + res = expc(tempc); + + for (int kk = 0; kk < (B_order - 1); kk++) { + tempc.x = 0; + if (NFFT == 0) { + MS_LOG(ERROR) << "Divide by zero."; + break; + } else { + tempc.y = 2 * PI * k / NFFT * kk; + } + tempc = expc(tempc); + tempf = M_(kk + 1, B_order); + tempc2.x += tempf * tempc.x; + tempc2.y += tempf * tempc.y; + } + res = cuCdivf(res, tempc2); + return res.x * res.x + res.y * res.y; + } + + private: + size_t ele_uint_crd = 1; + + std::vector input_size_list_; + std::vector output_size_list_; + std::vector workspace_size_list_; + int atom_numbers; + int fftx; + int ffty; + int fftz; + float beta; + float h_beta; + float factor; + int PME_Nall; + int PME_Nfft; + int PME_Nin; + float PI = 3.1415926; + int need_update; + std::vector B1; + std::vector B2; + std::vector B3; + std::vector PME_BC0; + std::vector PME_BC; + + cufftHandle PME_plan_r2c; + cufftHandle PME_plan_c2r; + struct VECTOR { + float x; + float y; + float z; + }; + _VECTOR PME_inverse_box_vector0; + _VECTOR PME_inverse_box_vector; + struct UNSIGNED_INT_VECTOR { + unsigned int uint_x; + unsigned int uint_y; + unsigned int uint_z; + }; + std::vector PME_kxyz_cpu; +}; +} // namespace kernel +} // namespace mindspore +#endif diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/simple_constrain/constrain_force_cycle_with_virial_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/simple_constrain/constrain_force_cycle_with_virial_kernel.cc index 4e7d8f69ac4..840703a171e 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/simple_constrain/constrain_force_cycle_with_virial_kernel.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/simple_constrain/constrain_force_cycle_with_virial_kernel.cc @@ -13,12 +13,16 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +/** + *Note: + * ConstrainForceCycleWithVirial. This is an experimental interface that is subject to change and/or deletion. + */ #include "backend/kernel_compiler/gpu/sponge/simple_constrain/constrain_force_cycle_with_virial_kernel.h" namespace mindspore { namespace kernel { -MS_REG_GPU_KERNEL_THREE(constrainforcecyclewithvirial, +MS_REG_GPU_KERNEL_THREE(ConstrainForceCycleWithVirial, KernelAttr() .AddInputAttr(kNumberTypeUInt32) .AddInputAttr(kNumberTypeFloat32) diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/simple_constrain/constrain_force_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/simple_constrain/constrain_force_kernel.cc new file mode 100644 index 00000000000..4ec01c6a495 --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/simple_constrain/constrain_force_kernel.cc @@ -0,0 +1,41 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + *Note: + * ConstrainForce. This is an experimental interface that is subject to change and/or deletion. + */ +#include "backend/kernel_compiler/gpu/sponge/simple_constrain/constrain_force_kernel.h" + +namespace mindspore { +namespace kernel { +MS_REG_GPU_KERNEL_THREE(ConstrainForce, + KernelAttr() + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddOutputAttr(kNumberTypeUInt32) + .AddOutputAttr(kNumberTypeFloat32) + .AddOutputAttr(kNumberTypeFloat32), + ConstrainForceGpuKernel, float, int, unsigned int) + +} // namespace kernel +} // namespace mindspore diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/simple_constrain/constrain_force_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/simple_constrain/constrain_force_kernel.h new file mode 100644 index 00000000000..89d229b3cb6 --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/simple_constrain/constrain_force_kernel.h @@ -0,0 +1,162 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + *Note: + * ConstrainForce. This is an experimental interface that is subject to change and/or deletion. + */ + +#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_SIMPLE_CONSTRAIN_CONSTRAIN_FORCE_KERNEL_H_ +#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_SIMPLE_CONSTRAIN_CONSTRAIN_FORCE_KERNEL_H_ + +#include "backend/kernel_compiler/gpu/cuda_impl/sponge/simple_constrain/constrain_force_virial_impl.cuh" + +#include +#include +#include +#include + +#include "backend/kernel_compiler/gpu/gpu_kernel.h" +#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h" +#include "runtime/device/gpu/cuda_common.h" + +namespace mindspore { +namespace kernel { +template +class ConstrainForceGpuKernel : public GpuKernel { + public: + ConstrainForceGpuKernel() : ele_crd(1) {} + ~ConstrainForceGpuKernel() override = default; + + bool Init(const CNodePtr &kernel_node) override { + kernel_node_ = kernel_node; + atom_numbers = static_cast(GetAttr(kernel_node, "atom_numbers")); + constrain_pair_numbers = static_cast(GetAttr(kernel_node, "constrain_pair_numbers")); + iteration_numbers = static_cast(GetAttr(kernel_node, "iteration_numbers")); + half_exp_gamma_plus_half = static_cast(GetAttr(kernel_node, "half_exp_gamma_plus_half")); + + auto shape_crd = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + auto shape_quarter_cof = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); + auto shape_mass_inverse = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2); + + auto shape_scaler = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3); + auto shape_pair_dr = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 4); + auto shape_atom_i_serials = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 5); + auto shape_atom_j_serials = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 6); + auto shape_constant_rs = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 7); + auto shape_constrain_ks = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 8); + + for (size_t i = 0; i < shape_scaler.size(); i++) ele_scaler *= shape_scaler[i]; + for (size_t i = 0; i < shape_pair_dr.size(); i++) ele_pair_dr *= shape_pair_dr[i]; + for (size_t i = 0; i < shape_atom_i_serials.size(); i++) ele_atom_i_serials *= shape_atom_i_serials[i]; + for (size_t i = 0; i < shape_atom_j_serials.size(); i++) ele_atom_j_serials *= shape_atom_j_serials[i]; + for (size_t i = 0; i < shape_constant_rs.size(); i++) ele_constant_rs *= shape_constant_rs[i]; + for (size_t i = 0; i < shape_constrain_ks.size(); i++) ele_constrain_ks *= shape_constrain_ks[i]; + + for (size_t i = 0; i < shape_crd.size(); i++) ele_crd *= shape_crd[i]; + for (size_t i = 0; i < shape_quarter_cof.size(); i++) ele_quarter_cof *= shape_quarter_cof[i]; + for (size_t i = 0; i < shape_mass_inverse.size(); i++) ele_mass_inverse *= shape_mass_inverse[i]; + + InitSizeLists(); + return true; + } + + const std::vector &GetInputSizeList() const override { return input_size_list_; } + const std::vector &GetOutputSizeList() const override { return output_size_list_; } + const std::vector &GetWorkspaceSizeList() const override { return workspace_size_list_; } + + bool Launch(const std::vector &inputs, const std::vector &workspace, + const std::vector &outputs, void *stream_ptr) override { + auto crd = GetDeviceAddress(inputs, 0); + auto quarter_cof = GetDeviceAddress(inputs, 1); + auto mass_inverse = GetDeviceAddress(inputs, 2); + + auto scaler = GetDeviceAddress(inputs, 3); + auto pair_dr = GetDeviceAddress(inputs, 4); + auto atom_i_serials = GetDeviceAddress(inputs, 5); + auto atom_j_serials = GetDeviceAddress(inputs, 6); + auto constant_rs = GetDeviceAddress(inputs, 7); + auto constrain_ks = GetDeviceAddress(inputs, 8); + + auto constrain_pair = GetDeviceAddress(workspace, 0); + + auto uint_crd = GetDeviceAddress(outputs, 0); + + auto test_frc_f = GetDeviceAddress(outputs, 1); + auto d_atom_virial = GetDeviceAddress(outputs, 2); + + set_zero_force_with_virial(atom_numbers, constrain_pair_numbers, test_frc_f, d_atom_virial, + reinterpret_cast(stream_ptr)); + + for (int i = 0; i < iteration_numbers; i++) { + refresh_uint_crd_update(atom_numbers, half_exp_gamma_plus_half, crd, quarter_cof, test_frc_f, mass_inverse, + uint_crd, reinterpret_cast(stream_ptr)); + + constrain_force_cycle_update(atom_numbers, constrain_pair_numbers, uint_crd, scaler, constrain_pair, pair_dr, + atom_i_serials, atom_j_serials, constant_rs, constrain_ks, test_frc_f, + reinterpret_cast(stream_ptr)); + } + return true; + } + + protected: + void InitSizeLists() override { + input_size_list_.push_back(ele_crd * sizeof(T)); + input_size_list_.push_back(ele_quarter_cof * sizeof(T)); + input_size_list_.push_back(ele_mass_inverse * sizeof(T)); + + input_size_list_.push_back(ele_scaler * sizeof(T)); + input_size_list_.push_back(ele_pair_dr * sizeof(T)); + input_size_list_.push_back(ele_atom_i_serials * sizeof(T1)); + input_size_list_.push_back(ele_atom_j_serials * sizeof(T1)); + input_size_list_.push_back(ele_constant_rs * sizeof(T)); + input_size_list_.push_back(ele_constrain_ks * sizeof(T)); + + workspace_size_list_.push_back(constrain_pair_numbers * sizeof(CONSTRAIN_PAIR)); + + output_size_list_.push_back(3 * atom_numbers * sizeof(T2)); + output_size_list_.push_back(3 * atom_numbers * sizeof(T)); + output_size_list_.push_back(constrain_pair_numbers * sizeof(T)); + } + + private: + size_t ele_scaler = 1; + size_t ele_pair_dr = 1; + size_t ele_atom_i_serials = 1; + size_t ele_atom_j_serials = 1; + size_t ele_constant_rs = 1; + size_t ele_constrain_ks = 1; + size_t ele_crd = 1; + size_t ele_quarter_cof = 1; + size_t ele_mass_inverse = 1; + + std::vector input_size_list_; + std::vector output_size_list_; + std::vector workspace_size_list_; + int atom_numbers; + int constrain_pair_numbers; + int iteration_numbers; + int need_pressure; + float half_exp_gamma_plus_half; + struct CONSTRAIN_PAIR { + int atom_i_serial; + int atom_j_serial; + float constant_r; + float constrain_k; + }; +}; +} // namespace kernel +} // namespace mindspore +#endif diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/simple_constrain/constrain_force_virial_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/simple_constrain/constrain_force_virial_kernel.cc new file mode 100644 index 00000000000..8a89e01912d --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/simple_constrain/constrain_force_virial_kernel.cc @@ -0,0 +1,42 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + *Note: + * ConstrainForceVirial. This is an experimental interface that is subject to change and/or deletion. + */ + +#include "backend/kernel_compiler/gpu/sponge/simple_constrain/constrain_force_virial_kernel.h" + +namespace mindspore { +namespace kernel { +MS_REG_GPU_KERNEL_THREE(ConstrainForceVirial, + KernelAttr() + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddOutputAttr(kNumberTypeUInt32) + .AddOutputAttr(kNumberTypeFloat32) + .AddOutputAttr(kNumberTypeFloat32), + ConstrainForceVirialGpuKernel, float, int, unsigned int) + +} // namespace kernel +} // namespace mindspore diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/simple_constrain/constrain_force_virial_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/simple_constrain/constrain_force_virial_kernel.h new file mode 100644 index 00000000000..07ad72ebab6 --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/simple_constrain/constrain_force_virial_kernel.h @@ -0,0 +1,159 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + *Note: + * ConstrainForceVirial. This is an experimental interface that is subject to change and/or deletion. + */ +#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_SIMPLE_CONSTRAIN_CONSTRAIN_FORCE_VIRIAL_KERNEL_H_ +#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_SIMPLE_CONSTRAIN_CONSTRAIN_FORCE_VIRIAL_KERNEL_H_ + +#include "backend/kernel_compiler/gpu/cuda_impl/sponge/simple_constrain/constrain_force_virial_impl.cuh" + +#include +#include +#include +#include + +#include "backend/kernel_compiler/gpu/gpu_kernel.h" +#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h" +#include "runtime/device/gpu/cuda_common.h" + +namespace mindspore { +namespace kernel { +template +class ConstrainForceVirialGpuKernel : public GpuKernel { + public: + ConstrainForceVirialGpuKernel() : ele_crd(1) {} + ~ConstrainForceVirialGpuKernel() override = default; + + bool Init(const CNodePtr &kernel_node) override { + kernel_node_ = kernel_node; + atom_numbers = static_cast(GetAttr(kernel_node, "atom_numbers")); + constrain_pair_numbers = static_cast(GetAttr(kernel_node, "constrain_pair_numbers")); + iteration_numbers = static_cast(GetAttr(kernel_node, "iteration_numbers")); + half_exp_gamma_plus_half = static_cast(GetAttr(kernel_node, "half_exp_gamma_plus_half")); + + auto shape_crd = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + auto shape_quarter_cof = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); + auto shape_mass_inverse = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2); + + auto shape_scaler = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3); + auto shape_pair_dr = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 4); + auto shape_atom_i_serials = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 5); + auto shape_atom_j_serials = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 6); + auto shape_constant_rs = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 7); + auto shape_constrain_ks = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 8); + + for (size_t i = 0; i < shape_scaler.size(); i++) ele_scaler *= shape_scaler[i]; + for (size_t i = 0; i < shape_pair_dr.size(); i++) ele_pair_dr *= shape_pair_dr[i]; + for (size_t i = 0; i < shape_atom_i_serials.size(); i++) ele_atom_i_serials *= shape_atom_i_serials[i]; + for (size_t i = 0; i < shape_atom_j_serials.size(); i++) ele_atom_j_serials *= shape_atom_j_serials[i]; + for (size_t i = 0; i < shape_constant_rs.size(); i++) ele_constant_rs *= shape_constant_rs[i]; + for (size_t i = 0; i < shape_constrain_ks.size(); i++) ele_constrain_ks *= shape_constrain_ks[i]; + + for (size_t i = 0; i < shape_crd.size(); i++) ele_crd *= shape_crd[i]; + for (size_t i = 0; i < shape_quarter_cof.size(); i++) ele_quarter_cof *= shape_quarter_cof[i]; + for (size_t i = 0; i < shape_mass_inverse.size(); i++) ele_mass_inverse *= shape_mass_inverse[i]; + + InitSizeLists(); + return true; + } + + const std::vector &GetInputSizeList() const override { return input_size_list_; } + const std::vector &GetOutputSizeList() const override { return output_size_list_; } + const std::vector &GetWorkspaceSizeList() const override { return workspace_size_list_; } + + bool Launch(const std::vector &inputs, const std::vector &workspace, + const std::vector &outputs, void *stream_ptr) override { + auto crd = GetDeviceAddress(inputs, 0); + auto quarter_cof = GetDeviceAddress(inputs, 1); + auto mass_inverse = GetDeviceAddress(inputs, 2); + auto scaler = GetDeviceAddress(inputs, 3); + auto pair_dr = GetDeviceAddress(inputs, 4); + auto atom_i_serials = GetDeviceAddress(inputs, 5); + auto atom_j_serials = GetDeviceAddress(inputs, 6); + auto constant_rs = GetDeviceAddress(inputs, 7); + auto constrain_ks = GetDeviceAddress(inputs, 8); + auto constrain_pair = GetDeviceAddress(workspace, 0); + auto uint_crd = GetDeviceAddress(outputs, 0); + auto test_frc_f = GetDeviceAddress(outputs, 1); + auto d_atom_virial = GetDeviceAddress(outputs, 2); + + set_zero_force_with_virial(atom_numbers, constrain_pair_numbers, test_frc_f, d_atom_virial, + reinterpret_cast(stream_ptr)); + + for (int i = 0; i < iteration_numbers; i++) { + refresh_uint_crd_update(atom_numbers, half_exp_gamma_plus_half, crd, quarter_cof, test_frc_f, mass_inverse, + uint_crd, reinterpret_cast(stream_ptr)); + + constrain_force_cycle_with_virial_update(atom_numbers, constrain_pair_numbers, uint_crd, scaler, constrain_pair, + pair_dr, atom_i_serials, atom_j_serials, constant_rs, constrain_ks, + test_frc_f, d_atom_virial, reinterpret_cast(stream_ptr)); + } + + return true; + } + + protected: + void InitSizeLists() override { + input_size_list_.push_back(ele_crd * sizeof(T)); + input_size_list_.push_back(ele_quarter_cof * sizeof(T)); + input_size_list_.push_back(ele_mass_inverse * sizeof(T)); + + input_size_list_.push_back(ele_scaler * sizeof(T)); + input_size_list_.push_back(ele_pair_dr * sizeof(T)); + input_size_list_.push_back(ele_atom_i_serials * sizeof(T1)); + input_size_list_.push_back(ele_atom_j_serials * sizeof(T1)); + input_size_list_.push_back(ele_constant_rs * sizeof(T)); + input_size_list_.push_back(ele_constrain_ks * sizeof(T)); + + workspace_size_list_.push_back(constrain_pair_numbers * sizeof(CONSTRAIN_PAIR)); + + output_size_list_.push_back(3 * atom_numbers * sizeof(T2)); + + output_size_list_.push_back(3 * atom_numbers * sizeof(T)); + output_size_list_.push_back(constrain_pair_numbers * sizeof(T)); + } + + private: + size_t ele_scaler = 1; + size_t ele_pair_dr = 1; + size_t ele_atom_i_serials = 1; + size_t ele_atom_j_serials = 1; + size_t ele_constant_rs = 1; + size_t ele_constrain_ks = 1; + size_t ele_crd = 1; + size_t ele_quarter_cof = 1; + size_t ele_mass_inverse = 1; + + std::vector input_size_list_; + std::vector output_size_list_; + std::vector workspace_size_list_; + int atom_numbers; + int constrain_pair_numbers; + int iteration_numbers; + int need_pressure; + float half_exp_gamma_plus_half; + struct CONSTRAIN_PAIR { + int atom_i_serial; + int atom_j_serial; + float constant_r; + float constrain_k; + }; +}; +} // namespace kernel +} // namespace mindspore +#endif diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/simple_constrain/constrain_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/simple_constrain/constrain_kernel.cc new file mode 100644 index 00000000000..cb8b8b1cfcb --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/simple_constrain/constrain_kernel.cc @@ -0,0 +1,42 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + *Note: + * Constrain. This is an experimental interface that is subject to change and/or deletion. + */ +#include "backend/kernel_compiler/gpu/sponge/simple_constrain/constrain_kernel.h" + +namespace mindspore { +namespace kernel { +MS_REG_GPU_KERNEL_THREE(Constrain, + KernelAttr() + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeInt32) + .AddOutputAttr(kNumberTypeUInt32) + .AddOutputAttr(kNumberTypeFloat32) + .AddOutputAttr(kNumberTypeFloat32), + ConstrainGpuKernel, float, int, unsigned int) + +} // namespace kernel +} // namespace mindspore diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/simple_constrain/constrain_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/simple_constrain/constrain_kernel.h new file mode 100644 index 00000000000..b5518f96e2d --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/simple_constrain/constrain_kernel.h @@ -0,0 +1,176 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + *Note: + * Constrain. This is an experimental interface that is subject to change and/or deletion. + */ +#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_SIMPLE_CONSTRAIN_CONSTRAIN_KERNEL_H_ +#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_SIMPLE_CONSTRAIN_CONSTRAIN_KERNEL_H_ + +#include "backend/kernel_compiler/gpu/cuda_impl/sponge/simple_constrain/constrain_force_virial_impl.cuh" + +#include +#include +#include +#include + +#include "backend/kernel_compiler/gpu/gpu_kernel.h" +#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h" +#include "runtime/device/gpu/cuda_common.h" + +namespace mindspore { +namespace kernel { +template +class ConstrainGpuKernel : public GpuKernel { + public: + ConstrainGpuKernel() : ele_crd(1), step(1) {} + ~ConstrainGpuKernel() override = default; + + bool Init(const CNodePtr &kernel_node) override { + // get bond_numbers + kernel_node_ = kernel_node; + atom_numbers = static_cast(GetAttr(kernel_node, "atom_numbers")); + constrain_pair_numbers = static_cast(GetAttr(kernel_node, "constrain_pair_numbers")); + iteration_numbers = static_cast(GetAttr(kernel_node, "iteration_numbers")); + half_exp_gamma_plus_half = static_cast(GetAttr(kernel_node, "half_exp_gamma_plus_half")); + + auto shape_crd = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + auto shape_quarter_cof = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); + auto shape_mass_inverse = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2); + + auto shape_scaler = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3); + auto shape_pair_dr = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 4); + auto shape_atom_i_serials = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 5); + auto shape_atom_j_serials = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 6); + auto shape_constant_rs = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 7); + auto shape_constrain_ks = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 8); + + for (size_t i = 0; i < shape_scaler.size(); i++) ele_scaler *= shape_scaler[i]; + for (size_t i = 0; i < shape_pair_dr.size(); i++) ele_pair_dr *= shape_pair_dr[i]; + for (size_t i = 0; i < shape_atom_i_serials.size(); i++) ele_atom_i_serials *= shape_atom_i_serials[i]; + for (size_t i = 0; i < shape_atom_j_serials.size(); i++) ele_atom_j_serials *= shape_atom_j_serials[i]; + for (size_t i = 0; i < shape_constant_rs.size(); i++) ele_constant_rs *= shape_constant_rs[i]; + for (size_t i = 0; i < shape_constrain_ks.size(); i++) ele_constrain_ks *= shape_constrain_ks[i]; + + for (size_t i = 0; i < shape_crd.size(); i++) ele_crd *= shape_crd[i]; + for (size_t i = 0; i < shape_quarter_cof.size(); i++) ele_quarter_cof *= shape_quarter_cof[i]; + for (size_t i = 0; i < shape_mass_inverse.size(); i++) ele_mass_inverse *= shape_mass_inverse[i]; + + InitSizeLists(); + return true; + } + + const std::vector &GetInputSizeList() const override { return input_size_list_; } + const std::vector &GetOutputSizeList() const override { return output_size_list_; } + const std::vector &GetWorkspaceSizeList() const override { return workspace_size_list_; } + + bool Launch(const std::vector &inputs, const std::vector &workspace, + const std::vector &outputs, void *stream_ptr) override { + auto crd = GetDeviceAddress(inputs, 0); + auto quarter_cof = GetDeviceAddress(inputs, 1); + auto mass_inverse = GetDeviceAddress(inputs, 2); + auto scaler = GetDeviceAddress(inputs, 3); + auto pair_dr = GetDeviceAddress(inputs, 4); + auto atom_i_serials = GetDeviceAddress(inputs, 5); + auto atom_j_serials = GetDeviceAddress(inputs, 6); + auto constant_rs = GetDeviceAddress(inputs, 7); + auto constrain_ks = GetDeviceAddress(inputs, 8); + auto d_need_pressure = GetDeviceAddress(inputs, 9); + + auto constrain_pair = GetDeviceAddress(workspace, 0); + + auto uint_crd = GetDeviceAddress(outputs, 0); + + auto test_frc_f = GetDeviceAddress(outputs, 1); + auto d_atom_virial = GetDeviceAddress(outputs, 2); + + cudaMemcpyAsync(&need_pressure, d_need_pressure, sizeof(int), cudaMemcpyDeviceToHost, + reinterpret_cast(stream_ptr)); + cudaStreamSynchronize(reinterpret_cast(stream_ptr)); + + set_zero_force_with_virial(atom_numbers, constrain_pair_numbers, test_frc_f, d_atom_virial, + reinterpret_cast(stream_ptr)); + + for (int i = 0; i < iteration_numbers; i++) { + refresh_uint_crd_update(atom_numbers, half_exp_gamma_plus_half, crd, quarter_cof, test_frc_f, mass_inverse, + uint_crd, reinterpret_cast(stream_ptr)); + + if (need_pressure) { + constrain_force_cycle_with_virial_update(atom_numbers, constrain_pair_numbers, uint_crd, scaler, constrain_pair, + pair_dr, atom_i_serials, atom_j_serials, constant_rs, constrain_ks, + test_frc_f, d_atom_virial, reinterpret_cast(stream_ptr)); + } else { + constrain_force_cycle_update(atom_numbers, constrain_pair_numbers, uint_crd, scaler, constrain_pair, pair_dr, + atom_i_serials, atom_j_serials, constant_rs, constrain_ks, test_frc_f, + reinterpret_cast(stream_ptr)); + } + } + step++; + + return true; + } + + protected: + void InitSizeLists() override { + input_size_list_.push_back(ele_crd * sizeof(T)); + input_size_list_.push_back(ele_quarter_cof * sizeof(T)); + input_size_list_.push_back(ele_mass_inverse * sizeof(T)); + + input_size_list_.push_back(ele_scaler * sizeof(T)); + input_size_list_.push_back(ele_pair_dr * sizeof(T)); + input_size_list_.push_back(ele_atom_i_serials * sizeof(T1)); + input_size_list_.push_back(ele_atom_j_serials * sizeof(T1)); + input_size_list_.push_back(ele_constant_rs * sizeof(T)); + input_size_list_.push_back(ele_constrain_ks * sizeof(T)); + + workspace_size_list_.push_back(constrain_pair_numbers * sizeof(CONSTRAIN_PAIR)); + + output_size_list_.push_back(3 * atom_numbers * sizeof(T2)); + + output_size_list_.push_back(3 * atom_numbers * sizeof(T)); + output_size_list_.push_back(constrain_pair_numbers * sizeof(T)); + } + + private: + size_t ele_scaler = 1; + size_t ele_pair_dr = 1; + size_t ele_atom_i_serials = 1; + size_t ele_atom_j_serials = 1; + size_t ele_constant_rs = 1; + size_t ele_constrain_ks = 1; + size_t ele_crd = 1; + size_t ele_quarter_cof = 1; + size_t ele_mass_inverse = 1; + + std::vector input_size_list_; + std::vector output_size_list_; + std::vector workspace_size_list_; + int atom_numbers; + int constrain_pair_numbers; + int iteration_numbers; + int need_pressure; + float half_exp_gamma_plus_half; + int step; + struct CONSTRAIN_PAIR { + int atom_i_serial; + int atom_j_serial; + float constant_r; + float constrain_k; + }; +}; +} // namespace kernel +} // namespace mindspore +#endif diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/simple_constrain/last_crd_to_dr_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/simple_constrain/last_crd_to_dr_kernel.cc index 6465a0604ed..9ab41a74177 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/simple_constrain/last_crd_to_dr_kernel.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/simple_constrain/last_crd_to_dr_kernel.cc @@ -13,12 +13,15 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - +/** + *Note: + * LastCrdToDr. This is an experimental interface that is subject to change and/or deletion. + */ #include "backend/kernel_compiler/gpu/sponge/simple_constrain/last_crd_to_dr_kernel.h" namespace mindspore { namespace kernel { -MS_REG_GPU_KERNEL_TWO(lastcrdtodr, +MS_REG_GPU_KERNEL_TWO(LastCrdToDr, KernelAttr() .AddInputAttr(kNumberTypeFloat32) .AddInputAttr(kNumberTypeFloat32) diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/simple_constrain/refresh_crd_vel_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/simple_constrain/refresh_crd_vel_kernel.cc index 98c5b18d891..3c39b419a50 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/simple_constrain/refresh_crd_vel_kernel.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/simple_constrain/refresh_crd_vel_kernel.cc @@ -13,12 +13,16 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +/** + *Note: + * RefreshCrdVel. This is an experimental interface that is subject to change and/or deletion. + */ #include "backend/kernel_compiler/gpu/sponge/simple_constrain/refresh_crd_vel_kernel.h" namespace mindspore { namespace kernel { -MS_REG_GPU_KERNEL_TWO(refreshcrdvel, +MS_REG_GPU_KERNEL_TWO(RefreshCrdVel, KernelAttr() .AddInputAttr(kNumberTypeFloat32) .AddInputAttr(kNumberTypeFloat32) diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/simple_constrain/refresh_uint_crd_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/simple_constrain/refresh_uint_crd_kernel.cc index 63f42a7f97e..60751e56de1 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/simple_constrain/refresh_uint_crd_kernel.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/simple_constrain/refresh_uint_crd_kernel.cc @@ -13,12 +13,15 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - +/** + *Note: + * RefreshUintCrd. This is an experimental interface that is subject to change and/or deletion. + */ #include "backend/kernel_compiler/gpu/sponge/simple_constrain/refresh_uint_crd_kernel.h" namespace mindspore { namespace kernel { -MS_REG_GPU_KERNEL_TWO(refreshuintcrd, +MS_REG_GPU_KERNEL_TWO(RefreshUintCrd, KernelAttr() .AddInputAttr(kNumberTypeFloat32) .AddInputAttr(kNumberTypeFloat32)