!22706 [BETA]sponge ops: lj, pme, simple constrain

Merge pull request !22706 from jiahongQian/master0831
This commit is contained in:
i-robot 2021-09-02 02:10:46 +00:00 committed by Gitee
commit 7d8ffd6a69
40 changed files with 2769 additions and 240 deletions

View File

@ -0,0 +1,144 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
*Note:
* LJForce. This is an experimental interface that is subject to change and/or deletion.
*/
#include "backend/kernel_compiler/gpu/cuda_impl/sponge/lj/lj_direct_cf_force_with_lj_virial_direct_cf_energy_impl.cuh"
#include "backend/kernel_compiler/gpu/cuda_impl/sponge/common_sponge.cuh"
__global__ void LJ_Direct_CF_Force_With_LJ_Virial_Direct_CF_Energy_CUDA(
const int atom_numbers, const NEIGHBOR_LIST *nl, const UINT_VECTOR_LJ_TYPE *uint_crd, const VECTOR *boxlength,
const float *LJ_type_A, const float *LJ_type_B, const float cutoff, VECTOR *frc, const float pme_beta,
const float sqrt_pi, float *atom_lj_virial, float *atom_direct_cf_energy) {
int atom_i = blockDim.x * blockIdx.x + threadIdx.x;
if (atom_i < atom_numbers) {
NEIGHBOR_LIST nl_i = nl[atom_i];
int N = nl_i.atom_numbers;
int atom_j;
int int_x;
int int_y;
int int_z;
UINT_VECTOR_LJ_TYPE r1 = uint_crd[atom_i], r2;
VECTOR dr;
float dr_2;
float dr_4;
float dr_8;
float dr_6;
float frc_abs = 0.;
VECTOR frc_lin;
VECTOR frc_record = {0., 0., 0.};
float charge_i = r1.charge;
float charge_j;
float dr_abs;
float dr_1;
float beta_dr;
float frc_cf_abs;
float virial_lin = 0.;
float energy_lin = 0.;
int x, y;
int atom_pair_LJ_type;
for (int j = threadIdx.y; j < N; j = j + blockDim.y) {
atom_j = nl_i.atom_serial[j];
r2 = uint_crd[atom_j];
charge_j = r2.charge;
int_x = r2.uint_x - r1.uint_x;
int_y = r2.uint_y - r1.uint_y;
int_z = r2.uint_z - r1.uint_z;
dr.x = boxlength[0].x * int_x;
dr.y = boxlength[0].y * int_y;
dr.z = boxlength[0].z * int_z;
dr_abs = norm3df(dr.x, dr.y, dr.z);
if (dr_abs < cutoff) {
dr_1 = 1. / dr_abs;
dr_2 = dr_1 * dr_1;
dr_4 = dr_2 * dr_2;
dr_8 = dr_4 * dr_4;
dr_6 = dr_4 * dr_2;
y = (r2.LJ_type - r1.LJ_type);
x = y >> 31;
y = (y ^ x) - x;
x = r2.LJ_type + r1.LJ_type;
r2.LJ_type = (x + y) >> 1;
x = (x - y) >> 1;
atom_pair_LJ_type = (r2.LJ_type * (r2.LJ_type + 1) >> 1) + x;
frc_abs = (-LJ_type_A[atom_pair_LJ_type] * dr_6 + LJ_type_B[atom_pair_LJ_type]) * dr_8;
beta_dr = pme_beta * dr_abs;
frc_cf_abs = beta_dr * sqrt_pi * expf(-beta_dr * beta_dr) + erfcf(beta_dr);
frc_cf_abs = frc_cf_abs * dr_2 * dr_1;
frc_cf_abs = charge_i * charge_j * frc_cf_abs;
energy_lin = energy_lin + charge_i * charge_j * erfcf(beta_dr) * dr_1;
virial_lin = virial_lin - frc_abs * dr_abs * dr_abs;
frc_abs = frc_abs - frc_cf_abs;
frc_lin.x = frc_abs * dr.x;
frc_lin.y = frc_abs * dr.y;
frc_lin.z = frc_abs * dr.z;
frc_record.x = frc_record.x + frc_lin.x;
frc_record.y = frc_record.y + frc_lin.y;
frc_record.z = frc_record.z + frc_lin.z;
atomicAdd(&frc[atom_j].x, -frc_lin.x);
atomicAdd(&frc[atom_j].y, -frc_lin.y);
atomicAdd(&frc[atom_j].z, -frc_lin.z);
}
}
atomicAdd(&frc[atom_i].x, frc_record.x);
atomicAdd(&frc[atom_i].y, frc_record.y);
atomicAdd(&frc[atom_i].z, frc_record.z);
atomicAdd(&atom_direct_cf_energy[atom_i], energy_lin);
atomicAdd(&atom_lj_virial[atom_i], virial_lin);
}
}
void LJ_Direct_CF_Force_With_LJ_Virial_Direct_CF_Energy(
const int atom_numbers, const float cutoff, const float pme_beta, const unsigned int *uint_crd_f, const int *LJtype,
const float *charge, const float *scaler_f, float *uint_crd_with_LJ, int *nl_atom_numbers, int *nl_atom_serial,
int *nl, const float *d_LJ_A, const float *d_LJ_B, float *frc_f, float *atom_lj_virial, float *atom_energy,
int max_neighbor_numbers, cudaStream_t stream) {
Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128, 0, stream>>>(3 * atom_numbers, frc_f, 0.);
Reset_List<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>(atom_numbers, atom_energy, 0.);
Reset_List<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>(atom_numbers, atom_lj_virial, 0.);
VECTOR *frc = reinterpret_cast<VECTOR *>(frc_f);
VECTOR *scaler = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(scaler_f));
NEIGHBOR_LIST *nl_a = reinterpret_cast<NEIGHBOR_LIST *>(nl);
construct_neighbor_list_kernel<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>(
atom_numbers, max_neighbor_numbers, nl_atom_numbers, nl_atom_serial, nl_a);
UINT_VECTOR_LJ_TYPE *uint_crd_with_LJ_a = reinterpret_cast<UINT_VECTOR_LJ_TYPE *>(uint_crd_with_LJ);
UNSIGNED_INT_VECTOR *uint_crd =
const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f));
Copy_Crd_To_New_Crd_Start<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
atom_numbers, uint_crd, uint_crd_with_LJ_a, LJtype, charge);
LJ_Direct_CF_Force_With_LJ_Virial_Direct_CF_Energy_CUDA<<<ceilf(static_cast<float>(atom_numbers) / 8), thread_LJ, 0,
stream>>>(
atom_numbers, nl_a, uint_crd_with_LJ_a, scaler, d_LJ_A, d_LJ_B, cutoff, frc, pme_beta, TWO_DIVIDED_BY_SQRT_PI,
atom_lj_virial, atom_energy);
return;
}
void LJ_Direct_CF_Force_With_LJ_Virial_Direct_CF_Energy(
const int atom_numbers, const float cutoff, const float pme_beta, const unsigned int *uint_crd_f, const int *LJtype,
const float *charge, const float *scaler_f, float *uint_crd_with_LJ, int *nl_atom_numbers, int *nl_atom_serial,
int *nl, const float *d_LJ_A, const float *d_LJ_B, float *frc_f, float *atom_lj_virial, float *atom_energy,
int max_neighbor_numbers, cudaStream_t stream);

View File

@ -0,0 +1,33 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
*Note:
* LJForce. This is an experimental interface that is subject to change and/or deletion.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_LJ_DIRECT_CF_FORCE_WITH_LJ_VIRIAL_DIRECT_CF_ENERGY_IMPL_H_
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_LJ_DIRECT_CF_FORCE_WITH_LJ_VIRIAL_DIRECT_CF_ENERGY_IMPL_H_
#include <curand_kernel.h>
#include "runtime/device/gpu/cuda_common.h"
void LJ_Direct_CF_Force_With_LJ_Virial_Direct_CF_Energy(
const int atom_numbers, const float cutoff, const float pme_beta, const unsigned int *uint_crd_f, const int *LJtype,
const float *charge, const float *scaler_f, float *uint_crd_with_LJ, int *nl_atom_numbers, int *nl_atom_serial,
int *nl, const float *d_LJ_A, const float *d_LJ_B, float *frc_f, float *atom_lj_virial, float *atom_energy,
int max_neighbor_numbers, cudaStream_t stream);
#endif

View File

@ -13,14 +13,16 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
*Note:
* MDIterationLeapFrog. This is an experimental interface that is subject to change and/or deletion.
*/
#include "backend/kernel_compiler/gpu/cuda_impl/sponge/nvtit/md_iteration_leap_frog_impl.cuh"
#include "backend/kernel_compiler/gpu/cuda_impl/sponge/common_sponge.cuh"
__global__ void MD_Iteration_Leap_Frog_With_LiuJian(const int atom_numbers, const float half_dt, const float dt,
const float exp_gamma, const float *inverse_mass,
const float *sqrt_mass_inverse, VECTOR *vel, VECTOR *crd,
VECTOR *frc, VECTOR *acc, VECTOR *random_frc) {
__global__ void MD_Iteration_Leap_Frog(const int atom_numbers, VECTOR *vel, VECTOR *crd, VECTOR *frc, VECTOR *acc,
const float *inverse_mass, const float dt) {
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < atom_numbers) {
acc[i].x = inverse_mass[i] * frc[i].x;
@ -31,17 +33,9 @@ __global__ void MD_Iteration_Leap_Frog_With_LiuJian(const int atom_numbers, cons
vel[i].y = vel[i].y + dt * acc[i].y;
vel[i].z = vel[i].z + dt * acc[i].z;
crd[i].x = crd[i].x + half_dt * vel[i].x;
crd[i].y = crd[i].y + half_dt * vel[i].y;
crd[i].z = crd[i].z + half_dt * vel[i].z;
vel[i].x = exp_gamma * vel[i].x + sqrt_mass_inverse[i] * random_frc[i].x;
vel[i].y = exp_gamma * vel[i].y + sqrt_mass_inverse[i] * random_frc[i].y;
vel[i].z = exp_gamma * vel[i].z + sqrt_mass_inverse[i] * random_frc[i].z;
crd[i].x = crd[i].x + half_dt * vel[i].x;
crd[i].y = crd[i].y + half_dt * vel[i].y;
crd[i].z = crd[i].z + half_dt * vel[i].z;
crd[i].x = crd[i].x + dt * vel[i].x;
crd[i].y = crd[i].y + dt * vel[i].y;
crd[i].z = crd[i].z + dt * vel[i].z;
frc[i].x = 0.;
frc[i].y = 0.;
@ -49,88 +43,12 @@ __global__ void MD_Iteration_Leap_Frog_With_LiuJian(const int atom_numbers, cons
}
}
__global__ void MD_Iteration_Leap_Frog_With_LiuJian_With_Max_Velocity(const int atom_numbers, const float half_dt,
const float dt, const float exp_gamma,
const float *inverse_mass,
const float *sqrt_mass_inverse, VECTOR *vel,
VECTOR *crd, VECTOR *frc, VECTOR *acc,
VECTOR *random_frc, const float max_vel) {
int i = blockDim.x * blockIdx.x + threadIdx.x;
float abs_vel;
if (i < atom_numbers) {
acc[i].x = inverse_mass[i] * frc[i].x;
acc[i].y = inverse_mass[i] * frc[i].y;
acc[i].z = inverse_mass[i] * frc[i].z;
vel[i].x = vel[i].x + dt * acc[i].x;
vel[i].y = vel[i].y + dt * acc[i].y;
vel[i].z = vel[i].z + dt * acc[i].z;
abs_vel = norm3df(vel[i].x, vel[i].y, vel[i].z);
if (abs_vel < max_vel) {
} else {
abs_vel = max_vel / abs_vel;
vel[i].x = abs_vel * vel[i].x;
vel[i].y = abs_vel * vel[i].y;
vel[i].z = abs_vel * vel[i].z;
}
crd[i].x = crd[i].x + half_dt * vel[i].x;
crd[i].y = crd[i].y + half_dt * vel[i].y;
crd[i].z = crd[i].z + half_dt * vel[i].z;
vel[i].x = exp_gamma * vel[i].x + sqrt_mass_inverse[i] * random_frc[i].x;
vel[i].y = exp_gamma * vel[i].y + sqrt_mass_inverse[i] * random_frc[i].y;
vel[i].z = exp_gamma * vel[i].z + sqrt_mass_inverse[i] * random_frc[i].z;
crd[i].x = crd[i].x + half_dt * vel[i].x;
crd[i].y = crd[i].y + half_dt * vel[i].y;
crd[i].z = crd[i].z + half_dt * vel[i].z;
frc[i].x = 0.;
frc[i].y = 0.;
frc[i].z = 0.;
}
void MDIterationLeapFrog(const int atom_numbers, float *vel, float *crd, float *frc, float *acc,
const float *inverse_mass, const float dt, cudaStream_t stream) {
VECTOR *d_vel = reinterpret_cast<VECTOR *>(vel);
VECTOR *d_crd = reinterpret_cast<VECTOR *>(crd);
VECTOR *d_frc = reinterpret_cast<VECTOR *>(frc);
VECTOR *d_acc = reinterpret_cast<VECTOR *>(acc);
MD_Iteration_Leap_Frog<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>(
atom_numbers, d_vel, d_crd, d_frc, d_acc, inverse_mass, dt);
}
void MDIterationLeapFrog(const int float4_numbers, const int atom_numbers, const float half_dt, const float dt,
const float exp_gamma, const int is_max_velocity, const float max_velocity,
const float *d_mass_inverse, const float *d_sqrt_mass, float *vel_f, float *crd_f,
float *frc_f, float *acc_f, cudaStream_t stream) {
Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128>>>(3 * atom_numbers, acc_f, 0.);
VECTOR *frc = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(frc_f));
VECTOR *vel = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(vel_f));
VECTOR *acc = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(acc_f));
VECTOR *crd = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(crd_f));
curandStatePhilox4_32_10_t *rand_state;
VECTOR *random_force;
Cuda_Malloc_Safely(reinterpret_cast<void **>(&random_force), sizeof(float4) * float4_numbers);
Cuda_Malloc_Safely(reinterpret_cast<void **>(&rand_state), sizeof(curandStatePhilox4_32_10_t) * float4_numbers);
Setup_Rand_Normal_Kernel<<<ceilf(static_cast<float>(float4_numbers) / 32.), 32>>>(float4_numbers, rand_state, 1);
Rand_Normal<<<ceilf(static_cast<float>(float4_numbers) / 32.), 32, 0, stream>>>(
float4_numbers, rand_state, reinterpret_cast<float4 *>(random_force));
if (!is_max_velocity) {
MD_Iteration_Leap_Frog_With_LiuJian<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
atom_numbers, half_dt, dt, exp_gamma, d_mass_inverse, d_sqrt_mass, vel, crd, frc, acc, random_force);
} else {
MD_Iteration_Leap_Frog_With_LiuJian_With_Max_Velocity<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0,
stream>>>(atom_numbers, half_dt, dt, exp_gamma,
d_mass_inverse, d_sqrt_mass, vel, crd, frc, acc,
random_force, max_velocity);
cudaStreamSynchronize(stream);
cudaFree(random_force);
cudaFree(rand_state);
return;
}
}
void MDIterationLeapFrog(const int float4_numbers, const int atom_numbers, const float half_dt, const float dt,
const float exp_gamma, const int is_max_velocity, const float max_velocity,
const float *d_mass_inverse, const float *d_sqrt_mass, float *vel_f, float *crd_f,
float *frc_f, float *acc_f, cudaStream_t stream);

View File

@ -13,15 +13,18 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
*Note:
* MDIterationLeapFrog. This is an experimental interface that is subject to change and/or deletion.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NVTIT_MD_ITERATION_LEAP_FROG_IMPL_H
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NVTIT_MD_ITERATION_LEAP_FROG_IMPL_H
#include <curand_kernel.h>
#include "runtime/device/gpu/cuda_common.h"
void MDIterationLeapFrog(const int float4_numbers, const int atom_numbers, const float half_dt, const float dt,
const float exp_gamma, const int is_max_velocity, const float max_velocity,
const float *d_mass_inverse, const float *d_sqrt_mass, float *vel_f, float *crd_f,
float *frc_f, float *acc_f, cudaStream_t stream);
void MDIterationLeapFrog(const int atom_numbers, float *vel, float *crd, float *frc, float *acc,
const float *inverse_mass, const float dt, cudaStream_t stream);
#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NVTIT_MD_ITERATION_LEAP_FROG_IMPL_H
#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_MD_ITERATION_LEAP_FROG_LIUJIAN_GPU_IMPL_H_

View File

@ -13,6 +13,10 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
*Note:
* PME_Common. This is an experimental interface that is subject to change and/or deletion.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_PME_PME_COMMON_H_
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_PME_PME_COMMON_H_
@ -25,7 +29,7 @@ __constant__ float PME_dMa[4] = {0.5, -1.5, 1.5, -0.5};
__constant__ float PME_dMb[4] = {0, 1, -2, 1};
__constant__ float PME_dMc[4] = {0, 0.5, 0, -0.5};
#define PI 3.1415926
const float periodic_factor_inverse = 2.32830643e-10;
const float periodic_factor_inverse = 2.3283064365387e-10;
static dim3 thread_PME;
const float cutoff = 10.0;
@ -98,6 +102,10 @@ static float getb(int k, int NFFT, int B_order) {
return res.x * res.x + res.y * res.y;
}
__global__ static void device_add(float *ene, float *factor, float *charge_sum) {
ene[0] += factor[0] * charge_sum[0] * charge_sum[0];
}
__global__ static void PME_Atom_Near(const UNSIGNED_INT_VECTOR *uint_crd, int *PME_atom_near, const int PME_Nin,
const float periodic_factor_inverse_x, const float periodic_factor_inverse_y,
const float periodic_factor_inverse_z, const int atom_numbers, const int fftx,
@ -130,10 +138,13 @@ __global__ static void PME_Atom_Near(const UNSIGNED_INT_VECTOR *uint_crd, int *P
UNSIGNED_INT_VECTOR temp_kxyz = PME_kxyz[k];
kx = tempux - temp_kxyz.uint_x;
if (kx < 0) kx += fftx;
if (kx > fftx) kx -= fftx;
ky = tempuy - temp_kxyz.uint_y;
if (ky < 0) ky += ffty;
if (ky > ffty) ky -= ffty;
kz = tempuz - temp_kxyz.uint_z;
if (kz < 0) kz += fftz;
if (kz > fftz) kz -= fftz;
temp_near[k] = kx * PME_Nin + ky * fftz + kz;
}
}
@ -227,4 +238,120 @@ __global__ static void PME_Direct_Energy(const int atom_numbers, const NEIGHBOR_
}
}
__global__ static void PME_Direct_Atom_Energy(const int atom_numbers, const NEIGHBOR_LIST *nl,
const UNSIGNED_INT_VECTOR *uint_crd, const VECTOR *boxlength,
const float *charge, const float beta, const float cutoff_square,
float *direct_ene) {
int atom_i = blockDim.x * blockIdx.x + threadIdx.x;
if (atom_i < atom_numbers) {
NEIGHBOR_LIST nl_i = nl[atom_i];
int N = nl_i.atom_numbers;
int atom_j;
int int_x;
int int_y;
int int_z;
UNSIGNED_INT_VECTOR r1 = uint_crd[atom_i], r2;
VECTOR dr;
float dr2;
float dr_abs;
// float dr_inverse;
float ene_temp;
float charge_i = charge[atom_i];
float ene_lin = 0.;
for (int j = threadIdx.y; j < N; j = j + blockDim.y) {
atom_j = nl_i.atom_serial[j];
r2 = uint_crd[atom_j];
int_x = r2.uint_x - r1.uint_x;
int_y = r2.uint_y - r1.uint_y;
int_z = r2.uint_z - r1.uint_z;
dr.x = boxlength[0].x * int_x;
dr.y = boxlength[0].y * int_y;
dr.z = boxlength[0].z * int_z;
dr2 = dr.x * dr.x + dr.y * dr.y + dr.z * dr.z;
if (dr2 < cutoff_square) {
dr_abs = norm3df(dr.x, dr.y, dr.z);
ene_temp = charge_i * charge[atom_j] * erfcf(beta * dr_abs) / dr_abs;
ene_lin = ene_lin + ene_temp;
}
}
atomicAdd(&direct_ene[atom_i], ene_lin);
}
}
__global__ static void PME_Energy_Product(const int element_number, const float *list1, const float *list2,
float *sum) {
if (threadIdx.x == 0) {
sum[0] = 0.;
}
__syncthreads();
float lin = 0.0;
for (int i = threadIdx.x; i < element_number; i = i + blockDim.x) {
lin = lin + list1[i] * list2[i];
}
atomicAdd(sum, lin);
}
__global__ static void PME_BCFQ(cufftComplex *PME_FQ, float *PME_BC, int PME_Nfft) {
int index = blockDim.x * blockIdx.x + threadIdx.x;
if (index < PME_Nfft) {
float tempf = PME_BC[index];
cufftComplex tempc = PME_FQ[index];
PME_FQ[index].x = tempc.x * tempf;
PME_FQ[index].y = tempc.y * tempf;
}
}
__global__ static void PME_Excluded_Energy_Correction(const int atom_numbers, const UNSIGNED_INT_VECTOR *uint_crd,
const VECTOR *sacler, const float *charge, const float pme_beta,
const float sqrt_pi, const int *excluded_list_start,
const int *excluded_list, const int *excluded_atom_numbers,
float *ene) {
int atom_i = blockDim.x * blockIdx.x + threadIdx.x;
if (atom_i < atom_numbers) {
int excluded_number = excluded_atom_numbers[atom_i];
if (excluded_number > 0) {
int list_start = excluded_list_start[atom_i];
// int atom_min = excluded_list[list_start];
int list_end = list_start + excluded_number;
int atom_j;
int int_x;
int int_y;
int int_z;
float charge_i = charge[atom_i];
float charge_j;
float dr_abs;
float beta_dr;
UNSIGNED_INT_VECTOR r1 = uint_crd[atom_i], r2;
VECTOR dr;
float dr2;
float ene_lin = 0.;
for (int i = list_start; i < list_end; i = i + 1) {
atom_j = excluded_list[i];
r2 = uint_crd[atom_j];
charge_j = charge[atom_j];
int_x = r2.uint_x - r1.uint_x;
int_y = r2.uint_y - r1.uint_y;
int_z = r2.uint_z - r1.uint_z;
dr.x = sacler[0].x * int_x;
dr.y = sacler[0].y * int_y;
dr.z = sacler[0].z * int_z;
dr2 = dr.x * dr.x + dr.y * dr.y + dr.z * dr.z;
dr_abs = sqrtf(dr2);
beta_dr = pme_beta * dr_abs;
ene_lin -= charge_i * charge_j * erff(beta_dr) / dr_abs;
}
atomicAdd(ene, ene_lin);
}
}
}
#endif

View File

@ -13,22 +13,14 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
*Note:
* PMEEnergy. This is an experimental interface that is subject to change and/or deletion.
*/
#include "backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_energy_impl.cuh"
#include "backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_common.cuh"
#include "backend/kernel_compiler/gpu/cuda_impl/sponge/common_sponge.cuh"
__global__ void PME_Energy_Product(const int element_number, const float *list1, const float *list2, float *sum) {
if (threadIdx.x == 0) {
sum[0] = 0.;
}
__syncthreads();
float lin = 0.0;
for (int i = threadIdx.x; i < element_number; i = i + blockDim.x) {
lin = lin + list1[i] * list2[i];
}
atomicAdd(sum, lin);
}
__global__ void PME_Energy_Reciprocal(const int element_number, const cufftComplex *FQ, const float *BC, float *sum) {
if (threadIdx.x == 0) {
sum[0] = 0.;
@ -43,56 +35,6 @@ __global__ void PME_Energy_Reciprocal(const int element_number, const cufftCompl
atomicAdd(sum, lin);
}
__global__ void PME_Excluded_Energy_Correction(const int atom_numbers, const UNSIGNED_INT_VECTOR *uint_crd,
const VECTOR *sacler, const float *charge, const float pme_beta,
const float sqrt_pi, const int *excluded_list_start,
const int *excluded_list, const int *excluded_atom_numbers, float *ene) {
int atom_i = blockDim.x * blockIdx.x + threadIdx.x;
if (atom_i < atom_numbers) {
int excluded_number = excluded_atom_numbers[atom_i];
if (excluded_number > 0) {
int list_start = excluded_list_start[atom_i];
// int atom_min = excluded_list[list_start];
int list_end = list_start + excluded_number;
int atom_j;
int int_x;
int int_y;
int int_z;
float charge_i = charge[atom_i];
float charge_j;
float dr_abs;
float beta_dr;
UNSIGNED_INT_VECTOR r1 = uint_crd[atom_i], r2;
VECTOR dr;
float dr2;
float ene_lin = 0.;
for (int i = list_start; i < list_end; i = i + 1) {
atom_j = excluded_list[i];
r2 = uint_crd[atom_j];
charge_j = charge[atom_j];
int_x = r2.uint_x - r1.uint_x;
int_y = r2.uint_y - r1.uint_y;
int_z = r2.uint_z - r1.uint_z;
dr.x = sacler[0].x * int_x;
dr.y = sacler[0].y * int_y;
dr.z = sacler[0].z * int_z;
dr2 = dr.x * dr.x + dr.y * dr.y + dr.z * dr.z;
dr_abs = sqrtf(dr2);
beta_dr = pme_beta * dr_abs;
ene_lin -= charge_i * charge_j * erff(beta_dr) / dr_abs;
}
atomicAdd(ene, ene_lin);
}
}
}
void PMEEnergy(int fftx, int ffty, int fftz, int atom_numbers, float beta, float *PME_BC, int *pme_uxyz,
float *pme_frxyz, float *PME_Q, float *pme_fq, int *PME_atom_near, int *pme_kxyz, const int *uint_crd_f,
const float *charge, int *nl_atom_numbers, int *nl_atom_serial, int *nl, const float *scaler_f,

View File

@ -0,0 +1,90 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
*Note:
* PMEEnergyUpdate. This is an experimental interface that is subject to change and/or deletion.
*/
#include "backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_energy_update_impl.cuh"
#include "backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_common.cuh"
#include "backend/kernel_compiler/gpu/cuda_impl/sponge/common_sponge.cuh"
__global__ void PME_Energy_Reciprocal_update(const int element_number, const cufftComplex *FQ, const float *BC,
float *sum) {
if (threadIdx.x == 0) {
sum[0] = 0.;
}
__syncthreads();
float lin = 0.0;
cufftComplex FQ_i;
for (int i = threadIdx.x; i < element_number; i = i + blockDim.x) {
FQ_i = FQ[i];
lin = lin + (FQ_i.x * FQ_i.x + FQ_i.y * FQ_i.y) * BC[i];
}
atomicAdd(sum, lin);
}
void PMEEnergyUpdate(int fftx, int ffty, int fftz, int atom_numbers, float beta, float *PME_BC, int *pme_uxyz,
float *pme_frxyz, float *PME_Q, float *pme_fq, int *PME_atom_near, int *pme_kxyz,
const int *uint_crd_f, const float *charge, int *nl_atom_numbers, int *nl_atom_serial, int *nl,
const float *scaler_f, const int *excluded_list_start, const int *excluded_list,
const int *excluded_atom_numbers, float *d_reciprocal_ene, float *d_self_ene, float *d_direct_ene,
float *d_correction_ene, dim3 thread_PME, int PME_Nin, int PME_Nfft, int PME_Nall,
const cufftHandle &PME_plan_r2c, const cufftHandle &PME_plan_c2r, float *neutralizing_factor,
float *charge_sum, int max_neighbor_numbers, cudaStream_t stream) {
UNSIGNED_INT_VECTOR *uint_crd =
const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f));
VECTOR *scaler = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(scaler_f));
// int max_neighbor_numbers = 800;
NEIGHBOR_LIST *nl_a = reinterpret_cast<NEIGHBOR_LIST *>(nl);
construct_neighbor_list_kernel<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>(
atom_numbers, max_neighbor_numbers, nl_atom_numbers, nl_atom_serial, nl_a);
UNSIGNED_INT_VECTOR *PME_uxyz = reinterpret_cast<UNSIGNED_INT_VECTOR *>(pme_uxyz);
UNSIGNED_INT_VECTOR *PME_kxyz = reinterpret_cast<UNSIGNED_INT_VECTOR *>(pme_kxyz);
VECTOR *PME_frxyz = reinterpret_cast<VECTOR *>(pme_frxyz);
cufftComplex *PME_FQ = reinterpret_cast<cufftComplex *>(pme_fq);
Reset_List<<<3 * atom_numbers / 32 + 1, 32, 0, stream>>>(3 * atom_numbers, reinterpret_cast<int *>(PME_uxyz),
1 << 30);
PME_Atom_Near<<<atom_numbers / 32 + 1, 32, 0, stream>>>(
uint_crd, PME_atom_near, PME_Nin, periodic_factor_inverse * fftx, periodic_factor_inverse * ffty,
periodic_factor_inverse * fftz, atom_numbers, fftx, ffty, fftz, PME_kxyz, PME_uxyz, PME_frxyz);
Reset_List<<<PME_Nall / 1024 + 1, 1024, 0, stream>>>(PME_Nall, PME_Q, 0);
PME_Q_Spread<<<atom_numbers / thread_PME.x + 1, thread_PME, 0, stream>>>(PME_atom_near, charge, PME_frxyz, PME_Q,
PME_kxyz, atom_numbers);
cufftExecR2C(PME_plan_r2c, reinterpret_cast<float *>(PME_Q), reinterpret_cast<cufftComplex *>(PME_FQ));
PME_Energy_Reciprocal_update<<<1, 1024, 0, stream>>>(PME_Nfft, PME_FQ, PME_BC, d_reciprocal_ene);
PME_Energy_Product<<<1, 1024, 0, stream>>>(atom_numbers, charge, charge, d_self_ene);
Scale_List<<<1, 1, 0, stream>>>(1, d_self_ene, -beta / sqrtf(PI));
Sum_Of_List<<<1, 1024>>>(atom_numbers, charge, charge_sum);
device_add<<<1, 1>>>(d_self_ene, neutralizing_factor, charge_sum);
Reset_List<<<1, 1, 0, stream>>>(1, d_direct_ene, 0.0);
PME_Direct_Energy<<<atom_numbers / thread_PME.x + 1, thread_PME, 0, stream>>>(
atom_numbers, nl_a, uint_crd, scaler, charge, beta, cutoff * cutoff, d_direct_ene);
Reset_List<<<1, 1, 0, stream>>>(1, d_correction_ene, 0.0);
PME_Excluded_Energy_Correction<<<atom_numbers / 32 + 1, 32, 0, stream>>>(
atom_numbers, uint_crd, scaler, charge, beta, sqrtf(PI), excluded_list_start, excluded_list, excluded_atom_numbers,
d_correction_ene);
return;
}

View File

@ -0,0 +1,31 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_PME_PME_ENERGY_UPDATE_IMPL_H_
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_PME_PME_ENERGY_UPDATE_IMPL_H_
#include <cufft.h>
#include "runtime/device/gpu/cuda_common.h"
void PMEEnergyUpdate(int fftx, int ffty, int fftz, int atom_numbers, float beta, float *PME_BC, int *pme_uxyz,
float *pme_frxyz, float *PME_Q, float *pme_fq, int *PME_atom_near, int *pme_kxyz,
const int *uint_crd_f, const float *charge, int *nl_atom_numbers, int *nl_atom_serial, int *nl,
const float *scaler_f, const int *excluded_list_start, const int *excluded_list,
const int *excluded_atom_numbers, float *d_reciprocal_ene, float *d_self_ene, float *d_direct_ene,
float *d_correction_ene, dim3 thread_PME, int PME_Nin, int PME_Nfft, int PME_Nall,
const cufftHandle &PME_plan_r2c, const cufftHandle &PME_plan_c2r, float *neutralizing_factor,
float *charge_sum, int max_neighbor_numbers, cudaStream_t stream);
#endif

View File

@ -13,19 +13,13 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
*Note:
* PMEReciprocalForce. This is an experimental interface that is subject to change and/or deletion.
*/
#include "backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_reciprocal_force_impl.cuh"
#include "backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_common.cuh"
__global__ void PME_BCFQ(cufftComplex *PME_FQ, float *PME_BC, int PME_Nfft) {
int index = blockDim.x * blockIdx.x + threadIdx.x;
if (index < PME_Nfft) {
float tempf = PME_BC[index];
cufftComplex tempc = PME_FQ[index];
PME_FQ[index].x = tempc.x * tempf;
PME_FQ[index].y = tempc.y * tempf;
}
}
__global__ void PME_Final(int *PME_atom_near, const float *charge, const float *PME_Q, VECTOR *force,
const VECTOR *PME_frxyz, const UNSIGNED_INT_VECTOR *PME_kxyz,
const _VECTOR PME_inverse_box_vector, const int atom_numbers) {
@ -92,9 +86,11 @@ void PMEReciprocalForce(int fftx, int ffty, int fftz, int atom_numbers, float be
// initial end
Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128, 0, stream>>>(
3 * atom_numbers, reinterpret_cast<float *>(frc), 0.);
PME_Atom_Near<<<atom_numbers / 32 + 1, 32, 0, stream>>>(
uint_crd, PME_atom_near, PME_Nin, periodic_factor_inverse * fftx, periodic_factor_inverse * ffty,
periodic_factor_inverse * fftz, atom_numbers, fftx, ffty, fftz, PME_kxyz, PME_uxyz, PME_frxyz);
Reset_List<<<PME_Nall / 1024 + 1, 1024, 0, stream>>>(PME_Nall, PME_Q, 0);
PME_Q_Spread<<<atom_numbers / thread_PME.x + 1, thread_PME, 0, stream>>>(PME_atom_near, charge, PME_frxyz, PME_Q,

View File

@ -13,9 +13,13 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
*Note:
* Constrain_Force_Cycle. This is an experimental interface that is subject to change and/or deletion.
*/
#include "backend/kernel_compiler/gpu/cuda_impl/sponge/simple_constrain/constrain_force_cycle_impl.cuh"
#include "backend/kernel_compiler/gpu/cuda_impl/sponge/common_sponge.cuh"
#include "backend/kernel_compiler/gpu/cuda_impl/sponge/simple_constrain/constrain_force_cycle_impl.cuh"
__global__ void Constrain_Force_Cycle_Kernel(const int constrain_pair_numbers, const UNSIGNED_INT_VECTOR *uint_crd,
const VECTOR *scaler, const CONSTRAIN_PAIR *constrain_pair,
@ -64,10 +68,10 @@ void Constrain_Force_Cycle(int atom_numbers, int constrain_pair_numbers, const u
CONSTRAIN_PAIR *constrain_pair = reinterpret_cast<CONSTRAIN_PAIR *>(constrain_pair_f);
construct_constrain_pair<<<ceilf(static_cast<float>(constrain_pair_numbers) / 128), 128, 0, stream>>>(
constrain_pair_numbers, atom_i_serials, atom_j_serials, constant_rs, constrain_ks, constrain_pair);
constrain_pair_numbers, atom_i_serials, atom_j_serials, constant_rs, constrain_ks, constrain_pair);
Constrain_Force_Cycle_Kernel<<<block_per_grid, thread_per_block, 0, stream>>>(
constrain_pair_numbers, uint_crd, scaler, constrain_pair, pair_dr, test_frc);
constrain_pair_numbers, uint_crd, scaler, constrain_pair, pair_dr, test_frc);
return;
}

View File

@ -13,6 +13,10 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
*Note:
* Constrain_Force_Cycle. This is an experimental interface that is subject to change and/or deletion.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_SIMPLE_CONSTRAIN_CONSTRAIN_FORCE_CYCLE_IMPL_H_
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_SIMPLE_CONSTRAIN_CONSTRAIN_FORCE_CYCLE_IMPL_H_
@ -24,4 +28,5 @@ void Constrain_Force_Cycle(int atom_numbers, int constrain_pair_numbers, const u
const float *scaler_f, float *constrain_pair_f, const float *pair_dr_f,
const int *atom_i_serials, const int *atom_j_serials, const float *constant_rs,
const float *constrain_ks, float *test_frc_f, cudaStream_t stream);
#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_SIMPLE_CONSTRAIN_CONSTRAIN_FORCE_CYCLE_WITH_VIRIAL_IMPL_H_
#endif

View File

@ -13,9 +13,13 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
*Note:
* ConstrainForceCycleVirial. This is an experimental interface that is subject to change and/or deletion.
*/
#include "backend/kernel_compiler/gpu/cuda_impl/sponge/simple_constrain/constrain_force_cycle_with_virial_impl.cuh"
#include "backend/kernel_compiler/gpu/cuda_impl/sponge/common_sponge.cuh"
#include "backend/kernel_compiler/gpu/cuda_impl/sponge/simple_constrain/constrain_force_cycle_with_virial_impl.cuh"
__global__ void Constrain_Force_Cycle_With_Virial(int constrain_pair_numbers, const UNSIGNED_INT_VECTOR *uint_crd,
const VECTOR *scaler, CONSTRAIN_PAIR *constrain_pair,
@ -46,7 +50,8 @@ void Constrain_Force_Cycle_With_Virial(int atom_numbers, int constrain_pair_numb
const float *constrain_ks, float *test_frc_f, float *d_atom_virial,
cudaStream_t stream) {
Reset_List<<<ceilf(static_cast<float>(3 * atom_numbers) / 128), 128, 0, stream>>>(3 * atom_numbers, test_frc_f, 0.);
Reset_List<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>(atom_numbers, d_atom_virial, 0.);
Reset_List<<<ceilf(static_cast<float>(constrain_pair_numbers) / 128), 128, 0, stream>>>(constrain_pair_numbers,
d_atom_virial, 0.);
size_t thread_per_block = 128;
size_t block_per_grid = ceilf(static_cast<float>(atom_numbers) / 128);
const UNSIGNED_INT_VECTOR *uint_crd = reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f);
@ -58,10 +63,10 @@ void Constrain_Force_Cycle_With_Virial(int atom_numbers, int constrain_pair_numb
CONSTRAIN_PAIR *constrain_pair = reinterpret_cast<CONSTRAIN_PAIR *>(constrain_pair_f);
construct_constrain_pair<<<ceilf(static_cast<float>(constrain_pair_numbers) / 128), 128, 0, stream>>>(
constrain_pair_numbers, atom_i_serials, atom_j_serials, constant_rs, constrain_ks, constrain_pair);
constrain_pair_numbers, atom_i_serials, atom_j_serials, constant_rs, constrain_ks, constrain_pair);
Constrain_Force_Cycle_With_Virial<<<block_per_grid, thread_per_block, 0, stream>>>(
constrain_pair_numbers, uint_crd, scaler, constrain_pair, pair_dr, test_frc, d_atom_virial);
constrain_pair_numbers, uint_crd, scaler, constrain_pair, pair_dr, test_frc, d_atom_virial);
return;
}

View File

@ -13,6 +13,10 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
*Note:
* ConstrainForceCycleVirial. This is an experimental interface that is subject to change and/or deletion.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_SIMPLE_CONSTRAIN_CONSTRAIN_FORCE_CYCLE_WITH_VIRIAL_IMPL_H_
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_SIMPLE_CONSTRAIN_CONSTRAIN_FORCE_CYCLE_WITH_VIRIAL_IMPL_H_
@ -25,4 +29,4 @@ void Constrain_Force_Cycle_With_Virial(int atom_numbers, int constrain_pair_numb
const int *atom_i_serials, const int *atom_j_serials, const float *constant_rs,
const float *constrain_ks, float *test_frc_f, float *d_atom_virial,
cudaStream_t stream);
#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_SIMPLE_CONSTRAIN_CONSTRAIN_FORCE_CYCLE_WITH_VIRIAL_IMPL_H_
#endif

View File

@ -0,0 +1,180 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
*Note:
* ConstrainForceCycleVirial. This is an experimental interface that is subject to change and/or deletion.
*/
#include "backend/kernel_compiler/gpu/cuda_impl/sponge/common_sponge.cuh"
#include "backend/kernel_compiler/gpu/cuda_impl/sponge/simple_constrain/constrain_force_virial_impl.cuh"
__global__ void constrain_force_cycle_with_virial_update_kernel(int constrain_pair_numbers,
const UNSIGNED_INT_VECTOR *uint_crd,
const VECTOR *scaler, CONSTRAIN_PAIR *constrain_pair,
const VECTOR *pair_dr, VECTOR *test_frc,
float *d_atom_virial) {
int pair_i = blockDim.x * blockIdx.x + threadIdx.x;
if (pair_i < constrain_pair_numbers) {
CONSTRAIN_PAIR cp = constrain_pair[pair_i];
VECTOR dr0 = pair_dr[pair_i];
VECTOR dr = Get_Periodic_Displacement(uint_crd[cp.atom_i_serial], uint_crd[cp.atom_j_serial], scaler[0]);
float r_1 = rnorm3df(dr.x, dr.y, dr.z);
float frc_abs = (1. - cp.constant_r * r_1) * cp.constrain_k;
VECTOR frc_lin = frc_abs * dr0;
d_atom_virial[pair_i] -= frc_lin * dr0;
atomicAdd(&test_frc[cp.atom_j_serial].x, frc_lin.x);
atomicAdd(&test_frc[cp.atom_j_serial].y, frc_lin.y);
atomicAdd(&test_frc[cp.atom_j_serial].z, frc_lin.z);
atomicAdd(&test_frc[cp.atom_i_serial].x, -frc_lin.x);
atomicAdd(&test_frc[cp.atom_i_serial].y, -frc_lin.y);
atomicAdd(&test_frc[cp.atom_i_serial].z, -frc_lin.z);
}
}
__global__ void constrain_force_cycle_update_kernel(const int constrain_pair_numbers,
const UNSIGNED_INT_VECTOR *uint_crd, const VECTOR *scaler,
const CONSTRAIN_PAIR *constrain_pair, const VECTOR *pair_dr,
VECTOR *test_frc) {
int pair_i = blockDim.x * blockIdx.x + threadIdx.x;
if (pair_i < constrain_pair_numbers) {
CONSTRAIN_PAIR cp = constrain_pair[pair_i];
float r_1;
VECTOR dr;
float frc_abs;
VECTOR frc_lin;
dr.x = (static_cast<int>(uint_crd[cp.atom_i_serial].uint_x - uint_crd[cp.atom_j_serial].uint_x)) * scaler[0].x;
dr.y = (static_cast<int>(uint_crd[cp.atom_i_serial].uint_y - uint_crd[cp.atom_j_serial].uint_y)) * scaler[0].y;
dr.z = (static_cast<int>(uint_crd[cp.atom_i_serial].uint_z - uint_crd[cp.atom_j_serial].uint_z)) * scaler[0].z;
r_1 = rnorm3df(dr.x, dr.y, dr.z);
frc_abs = (1. - cp.constant_r * r_1) * cp.constrain_k;
frc_lin.x = frc_abs * pair_dr[pair_i].x;
frc_lin.y = frc_abs * pair_dr[pair_i].y;
frc_lin.z = frc_abs * pair_dr[pair_i].z;
atomicAdd(&test_frc[cp.atom_j_serial].x, frc_lin.x);
atomicAdd(&test_frc[cp.atom_j_serial].y, frc_lin.y);
atomicAdd(&test_frc[cp.atom_j_serial].z, frc_lin.z);
atomicAdd(&test_frc[cp.atom_i_serial].x, -frc_lin.x);
atomicAdd(&test_frc[cp.atom_i_serial].y, -frc_lin.y);
atomicAdd(&test_frc[cp.atom_i_serial].z, -frc_lin.z);
}
}
__global__ void refresh_uint_crd_update_kernel(int atom_numbers, const VECTOR *crd,
const VECTOR *quarter_crd_to_uint_crd_cof, UNSIGNED_INT_VECTOR *uint_crd,
VECTOR *test_frc, const float *mass_inverse,
const float half_exp_gamma_plus_half) {
int atom_i = blockDim.x * blockIdx.x + threadIdx.x;
if (atom_i < atom_numbers) {
INT_VECTOR tempi;
VECTOR crd_lin = crd[atom_i];
VECTOR frc_lin = test_frc[atom_i];
float mass_lin = mass_inverse[atom_i];
crd_lin.x = crd_lin.x + half_exp_gamma_plus_half * frc_lin.x * mass_lin;
crd_lin.y = crd_lin.y + half_exp_gamma_plus_half * frc_lin.y * mass_lin;
crd_lin.z = crd_lin.z + half_exp_gamma_plus_half * frc_lin.z * mass_lin;
tempi.int_x = crd_lin.x * quarter_crd_to_uint_crd_cof[0].x;
tempi.int_y = crd_lin.y * quarter_crd_to_uint_crd_cof[0].y;
tempi.int_z = crd_lin.z * quarter_crd_to_uint_crd_cof[0].z;
uint_crd[atom_i].uint_x = tempi.int_x << 2;
uint_crd[atom_i].uint_y = tempi.int_y << 2;
uint_crd[atom_i].uint_z = tempi.int_z << 2;
}
}
void constrain_force_cycle_update(int atom_numbers, int constrain_pair_numbers, const unsigned int *uint_crd_f,
const float *scaler_f, float *constrain_pair_f, const float *pair_dr_f,
const int *atom_i_serials, const int *atom_j_serials, const float *constant_rs,
const float *constrain_ks, float *test_frc_f, cudaStream_t stream) {
size_t thread_per_block = 128;
size_t block_per_grid = ceilf(static_cast<float>(atom_numbers) / 128);
const UNSIGNED_INT_VECTOR *uint_crd = reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f);
const VECTOR *scaler = reinterpret_cast<const VECTOR *>(scaler_f);
const VECTOR *pair_dr = reinterpret_cast<const VECTOR *>(pair_dr_f);
VECTOR *test_frc = reinterpret_cast<VECTOR *>(test_frc_f);
CONSTRAIN_PAIR *constrain_pair = reinterpret_cast<CONSTRAIN_PAIR *>(constrain_pair_f);
construct_constrain_pair<<<ceilf(static_cast<float>(constrain_pair_numbers) / 128), 128, 0, stream>>>(
constrain_pair_numbers, atom_i_serials, atom_j_serials, constant_rs, constrain_ks, constrain_pair);
constrain_force_cycle_update_kernel<<<block_per_grid, thread_per_block, 0, stream>>>(
constrain_pair_numbers, uint_crd, scaler, constrain_pair, pair_dr, test_frc);
return;
}
void constrain_force_cycle_with_virial_update(int atom_numbers, int constrain_pair_numbers,
const unsigned int *uint_crd_f, const float *scaler_f,
float *constrain_pair_f, const float *pair_dr_f,
const int *atom_i_serials, const int *atom_j_serials,
const float *constant_rs, const float *constrain_ks, float *test_frc_f,
float *d_atom_virial, cudaStream_t stream) {
size_t thread_per_block = 128;
size_t block_per_grid = ceilf(static_cast<float>(atom_numbers) / 128);
const UNSIGNED_INT_VECTOR *uint_crd = reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f);
const VECTOR *scaler = reinterpret_cast<const VECTOR *>(scaler_f);
const VECTOR *pair_dr = reinterpret_cast<const VECTOR *>(pair_dr_f);
VECTOR *test_frc = reinterpret_cast<VECTOR *>(test_frc_f);
CONSTRAIN_PAIR *constrain_pair = reinterpret_cast<CONSTRAIN_PAIR *>(constrain_pair_f);
construct_constrain_pair<<<ceilf(static_cast<float>(constrain_pair_numbers) / 128), 128, 0, stream>>>(
constrain_pair_numbers, atom_i_serials, atom_j_serials, constant_rs, constrain_ks, constrain_pair);
constrain_force_cycle_with_virial_update_kernel<<<block_per_grid, thread_per_block, 0, stream>>>(
constrain_pair_numbers, uint_crd, scaler, constrain_pair, pair_dr, test_frc, d_atom_virial);
return;
}
void refresh_uint_crd_update(int atom_numbers, float half_exp_gamma_plus_half, const float *crd_f,
const float *quarter_crd_to_uint_crd_cof_f, float *test_frc_f, const float *mass_inverse,
unsigned int *uint_crd_f, cudaStream_t stream) {
size_t thread_per_block = 128;
size_t block_per_grid = ceilf(static_cast<float>(atom_numbers) / 128);
const VECTOR *crd = reinterpret_cast<const VECTOR *>(crd_f);
const VECTOR *quarter_crd_to_uint_crd_cof = reinterpret_cast<const VECTOR *>(quarter_crd_to_uint_crd_cof_f);
VECTOR *test_frc = reinterpret_cast<VECTOR *>(test_frc_f);
UNSIGNED_INT_VECTOR *uint_crd = reinterpret_cast<UNSIGNED_INT_VECTOR *>(uint_crd_f);
refresh_uint_crd_update_kernel<<<block_per_grid, thread_per_block, 0, stream>>>(
atom_numbers, crd, quarter_crd_to_uint_crd_cof, uint_crd, test_frc, mass_inverse, half_exp_gamma_plus_half);
return;
}
void set_zero_force_with_virial(int atom_numbers, int constrain_pair_numbers, float *test_frc_f, float *d_atom_virial,
cudaStream_t stream) {
Reset_List<<<ceilf(static_cast<float>(3 * atom_numbers) / 128), 128, 0, stream>>>(3 * atom_numbers, test_frc_f, 0.);
Reset_List<<<ceilf(static_cast<float>(constrain_pair_numbers) / 128), 128, 0, stream>>>(constrain_pair_numbers,
d_atom_virial, 0.);
return;
}
void set_zero(int numbers, float *x, cudaStream_t stream) {
Reset_List<<<ceilf(static_cast<float>(numbers) / 128), 128, 0, stream>>>(numbers, x, 0.);
return;
}

View File

@ -0,0 +1,48 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
*Note:
* ConstrainForce. This is an experimental interface that is subject to change and/or deletion.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_SIMPLE_CONSTRAIN_CONSTRAIN_FORCE_VIRIAL_IMPL_H_
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_SIMPLE_CONSTRAIN_CONSTRAIN_FORCE_VIRIAL_IMPL_H_
#include <curand_kernel.h>
#include "runtime/device/gpu/cuda_common.h"
void constrain_force_cycle_update(int atom_numbers, int constrain_pair_numbers, const unsigned int *uint_crd_f,
const float *scaler_f, float *constrain_pair_f, const float *pair_dr_f,
const int *atom_i_serials, const int *atom_j_serials, const float *constant_rs,
const float *constrain_ks, float *test_frc_f, cudaStream_t stream);
void constrain_force_cycle_with_virial_update(int atom_numbers, int constrain_pair_numbers,
const unsigned int *uint_crd_f, const float *scaler_f,
float *constrain_pair_f, const float *pair_dr_f,
const int *atom_i_serials, const int *atom_j_serials,
const float *constant_rs, const float *constrain_ks, float *test_frc_f,
float *d_atom_virial, cudaStream_t stream);
void refresh_uint_crd_update(int atom_numbers, float half_exp_gamma_plus_half, const float *crd_f,
const float *quarter_crd_to_uint_crd_cof_f, float *test_frc_f, const float *mass_inverse,
unsigned int *uint_crd_f, cudaStream_t stream);
void set_zero_force_with_virial(int atom_numbers, int constrain_pair_numbers, float *test_frc_f, float *d_atom_virial,
cudaStream_t stream);
void set_zero(int numbers, float *x, cudaStream_t stream);
#endif

View File

@ -0,0 +1,39 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
*Note:
* LJForceWithVirialEnergy. This is an experimental interface that is subject to change and/or deletion.
*/
#include "backend/kernel_compiler/gpu/sponge/lj/lj_direct_cf_force_with_lj_virial_direct_cf_energy_kernel.h"
namespace mindspore {
namespace kernel {
MS_REG_GPU_KERNEL_THREE(LJForceWithVirialEnergy,
KernelAttr()
.AddInputAttr(kNumberTypeUInt32)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddOutputAttr(kNumberTypeFloat32)
.AddOutputAttr(kNumberTypeFloat32)
.AddOutputAttr(kNumberTypeFloat32),
LJForceWithVirialEnergyGpuKernel, float, int, unsigned int)
} // namespace kernel
} // namespace mindspore

View File

@ -0,0 +1,142 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
*Note:
* LJForceWithVirialEnergy. This is an experimental interface that is subject to change and/or deletion.
*/
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_LJ_DIRECT_CF_FORCE_WITH_LJ_VIRIAL_DIRECT_CF_ENERGY_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_LJ_DIRECT_CF_FORCE_WITH_LJ_VIRIAL_DIRECT_CF_ENERGY_KERNEL_H_
#include <cuda_runtime_api.h>
#include <vector>
#include <string>
#include <map>
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
#include "runtime/device/gpu/cuda_common.h"
#include "backend/kernel_compiler/gpu/cuda_impl/sponge/lj/lj_direct_cf_force_with_lj_virial_direct_cf_energy_impl.cuh"
namespace mindspore {
namespace kernel {
template <typename T, typename T1, typename T2>
class LJForceWithVirialEnergyGpuKernel : public GpuKernel {
public:
LJForceWithVirialEnergyGpuKernel() : ele_uint_crd(1) {}
~LJForceWithVirialEnergyGpuKernel() override = default;
bool Init(const CNodePtr &kernel_node) override {
kernel_node_ = kernel_node;
atom_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "atom_numbers"));
max_neighbor_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "max_neighbor_numbers"));
cutoff = static_cast<float>(GetAttr<float_t>(kernel_node, "cutoff"));
pme_beta = static_cast<float>(GetAttr<float_t>(kernel_node, "pme_beta"));
auto shape_uint_crd = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
auto shape_LJtype = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
auto shape_charge = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2);
auto shape_scaler = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3);
auto shape_nl_numbers = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 4);
auto shape_nl_serial = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 5);
auto shape_d_LJ_a = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 6);
auto shape_d_LJ_b = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 7);
for (size_t i = 0; i < shape_uint_crd.size(); i++) ele_uint_crd *= shape_uint_crd[i];
for (size_t i = 0; i < shape_LJtype.size(); i++) ele_LJtype *= shape_LJtype[i];
for (size_t i = 0; i < shape_charge.size(); i++) ele_charge *= shape_charge[i];
for (size_t i = 0; i < shape_scaler.size(); i++) ele_scaler *= shape_scaler[i];
for (size_t i = 0; i < shape_d_LJ_a.size(); i++) ele_d_LJ_a *= shape_d_LJ_a[i];
for (size_t i = 0; i < shape_d_LJ_b.size(); i++) ele_d_LJ_b *= shape_d_LJ_b[i];
InitSizeLists();
return true;
}
const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
auto uint_crd = GetDeviceAddress<T2>(inputs, 0);
auto LJtype = GetDeviceAddress<T1>(inputs, 1);
auto charge = GetDeviceAddress<T>(inputs, 2);
auto scaler = GetDeviceAddress<T>(inputs, 3);
auto nl_numbers = GetDeviceAddress<T1>(inputs, 4);
auto nl_serial = GetDeviceAddress<T1>(inputs, 5);
auto d_LJ_a = GetDeviceAddress<T>(inputs, 6);
auto d_LJ_b = GetDeviceAddress<T>(inputs, 7);
auto uint_crd_with_LJ = GetDeviceAddress<T>(workspace, 0);
auto nl = GetDeviceAddress<T1>(workspace, 1);
auto frc = GetDeviceAddress<T>(outputs, 0);
auto atom_lj_virial = GetDeviceAddress<T>(outputs, 1);
auto atom_energy = GetDeviceAddress<T>(outputs, 2);
LJ_Direct_CF_Force_With_LJ_Virial_Direct_CF_Energy(atom_numbers, cutoff, pme_beta, uint_crd, LJtype, charge, scaler,
uint_crd_with_LJ, nl_numbers, nl_serial, nl, d_LJ_a, d_LJ_b, frc,
atom_lj_virial, atom_energy, max_neighbor_numbers,
reinterpret_cast<cudaStream_t>(stream_ptr));
return true;
}
protected:
void InitSizeLists() override {
input_size_list_.push_back(ele_uint_crd * sizeof(T2));
input_size_list_.push_back(ele_LJtype * sizeof(T1));
input_size_list_.push_back(ele_charge * sizeof(T));
input_size_list_.push_back(ele_scaler * sizeof(T));
input_size_list_.push_back(atom_numbers * sizeof(T1));
input_size_list_.push_back(max_neighbor_numbers * sizeof(T1));
input_size_list_.push_back(ele_d_LJ_a * sizeof(T));
input_size_list_.push_back(ele_d_LJ_b * sizeof(T));
workspace_size_list_.push_back(atom_numbers * max_neighbor_numbers * sizeof(T1));
workspace_size_list_.push_back(atom_numbers * sizeof(UINT_VECTOR_LJ_TYPE));
output_size_list_.push_back(atom_numbers * 3 * sizeof(T));
output_size_list_.push_back(atom_numbers * sizeof(T));
output_size_list_.push_back(atom_numbers * sizeof(T));
}
private:
size_t ele_uint_crd = 1;
size_t ele_LJtype = 1;
size_t ele_charge = 1;
size_t ele_scaler = 1;
size_t ele_nl = 1;
size_t ele_d_LJ_a = 1;
size_t ele_d_LJ_b = 1;
std::vector<size_t> input_size_list_;
std::vector<size_t> output_size_list_;
std::vector<size_t> workspace_size_list_;
int atom_numbers;
int max_neighbor_numbers;
float pme_beta;
float cutoff;
struct UINT_VECTOR_LJ_TYPE {
unsigned int uint_x;
unsigned int uint_y;
unsigned int uint_z;
int LJ_type;
float charge;
};
struct NEIGHBOR_LIST {
int atom_numbers;
int *atom_serial;
};
};
} // namespace kernel
} // namespace mindspore
#endif

View File

@ -0,0 +1,38 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
*Note:
* LJForceWithPMEDirectForceUpdate. This is an experimental interface that is subject to change and/or deletion.
*/
#include "backend/kernel_compiler/gpu/sponge/lj/lj_force_with_pme_direct_force_update_kernel.h"
namespace mindspore {
namespace kernel {
MS_REG_GPU_KERNEL_TWO(LJForceWithPMEDirectForceUpdate,
KernelAttr()
.AddInputAttr(kNumberTypeUInt32)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddOutputAttr(kNumberTypeFloat32),
LJForceWithPMEDirectForceUpdateGpuKernel, float, int)
} // namespace kernel
} // namespace mindspore

View File

@ -0,0 +1,151 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
*Note:
* LJForceWithPMEDirectForceUpdate. This is an experimental interface that is subject to change and/or deletion.
*/
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONGE_LJ_LJ_FORCE_WITH_PME_DIRECT_FORCE_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONGE_LJ_LJ_FORCE_WITH_PME_DIRECT_FORCE_KERNEL_H_
#include <cuda_runtime_api.h>
#include <vector>
#include <string>
#include <map>
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
#include "runtime/device/gpu/cuda_common.h"
#include "backend/kernel_compiler/gpu/cuda_impl/sponge/lj/lj_force_with_pme_direct_force_impl.cuh"
namespace mindspore {
namespace kernel {
template <typename T, typename T1>
class LJForceWithPMEDirectForceUpdateGpuKernel : public GpuKernel {
public:
LJForceWithPMEDirectForceUpdateGpuKernel() : ele_uint_crd(1) {}
~LJForceWithPMEDirectForceUpdateGpuKernel() override = default;
bool Init(const CNodePtr &kernel_node) override {
kernel_node_ = kernel_node;
atom_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "atom_numbers"));
cutoff = static_cast<float>(GetAttr<float_t>(kernel_node, "cutoff"));
pme_beta = static_cast<float>(GetAttr<float_t>(kernel_node, "pme_beta"));
need_update = static_cast<int>(GetAttr<int64_t>(kernel_node, "need_update"));
auto shape_uint_crd = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
auto shape_LJtype = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
auto shape_charge = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2);
auto shape_scaler = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3);
auto shape_nl_numbers = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 4);
auto shape_nl_serial = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 5);
auto shape_d_LJ_a = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 6);
auto shape_d_LJ_b = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 7);
for (size_t i = 0; i < shape_uint_crd.size(); i++) ele_uint_crd *= shape_uint_crd[i];
for (size_t i = 0; i < shape_LJtype.size(); i++) ele_LJtype *= shape_LJtype[i];
for (size_t i = 0; i < shape_charge.size(); i++) ele_charge *= shape_charge[i];
for (size_t i = 0; i < shape_scaler.size(); i++) ele_scaler *= shape_scaler[i];
for (size_t i = 0; i < shape_d_LJ_a.size(); i++) ele_d_LJ_a *= shape_d_LJ_a[i];
for (size_t i = 0; i < shape_d_LJ_b.size(); i++) ele_d_LJ_b *= shape_d_LJ_b[i];
InitSizeLists();
return true;
}
const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
auto uint_crd = GetDeviceAddress<T1>(inputs, 0);
auto LJtype = GetDeviceAddress<T1>(inputs, 1);
auto charge = GetDeviceAddress<T>(inputs, 2);
auto scaler = GetDeviceAddress<T>(inputs, 3);
auto nl_numbers = GetDeviceAddress<T1>(inputs, 4);
auto nl_serial = GetDeviceAddress<T1>(inputs, 5);
auto d_LJ_a = GetDeviceAddress<T>(inputs, 6);
auto d_LJ_b = GetDeviceAddress<T>(inputs, 7);
auto d_beta = GetDeviceAddress<T>(inputs, 8);
if (need_update) {
cudaMemcpyAsync(&pme_beta, d_beta, sizeof(float), cudaMemcpyDeviceToHost,
reinterpret_cast<cudaStream_t>(stream_ptr));
cudaStreamSynchronize(reinterpret_cast<cudaStream_t>(stream_ptr));
}
auto uint_crd_with_LJ = GetDeviceAddress<T>(workspace, 0);
auto nl = GetDeviceAddress<T1>(workspace, 1);
auto frc = GetDeviceAddress<T>(outputs, 0);
LJForceWithPMEDirectForce(atom_numbers, cutoff, pme_beta, uint_crd, LJtype, charge, scaler, uint_crd_with_LJ,
nl_numbers, nl_serial, nl, d_LJ_a, d_LJ_b, frc,
reinterpret_cast<cudaStream_t>(stream_ptr));
return true;
}
protected:
void InitSizeLists() override {
input_size_list_.push_back(ele_uint_crd * sizeof(T1));
input_size_list_.push_back(ele_LJtype * sizeof(T1));
input_size_list_.push_back(ele_charge * sizeof(T));
input_size_list_.push_back(ele_scaler * sizeof(T));
input_size_list_.push_back(atom_numbers * sizeof(T1));
input_size_list_.push_back(max_nl_numbers * sizeof(T1));
input_size_list_.push_back(ele_d_LJ_a * sizeof(T));
input_size_list_.push_back(ele_d_LJ_b * sizeof(T));
input_size_list_.push_back(sizeof(T));
workspace_size_list_.push_back(atom_numbers * max_nl_numbers * sizeof(T1));
workspace_size_list_.push_back(atom_numbers * sizeof(UINT_VECTOR_LJ_TYPE));
output_size_list_.push_back(atom_numbers * 3 * sizeof(T));
}
private:
size_t ele_uint_crd = 1;
size_t ele_LJtype = 1;
size_t ele_charge = 1;
size_t ele_scaler = 1;
size_t ele_nl = 1;
size_t ele_d_LJ_a = 1;
size_t ele_d_LJ_b = 1;
std::vector<size_t> input_size_list_;
std::vector<size_t> output_size_list_;
std::vector<size_t> workspace_size_list_;
int atom_numbers;
float pme_beta;
float cutoff;
int need_update;
int max_nl_numbers = 800;
struct UINT_VECTOR_LJ_TYPE {
unsigned int uint_x;
unsigned int uint_y;
unsigned int uint_z;
int LJ_type;
float charge;
};
struct NEIGHBOR_LIST {
int atom_numbers;
int *atom_serial;
};
struct VECTOR {
float x;
float y;
float z;
};
};
} // namespace kernel
} // namespace mindspore
#endif

View File

@ -0,0 +1,40 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
*Note:
* LJForceWithVirialEnergyUpdate. This is an experimental interface that is subject to change and/or deletion.
*/
#include "backend/kernel_compiler/gpu/sponge/lj/lj_force_with_virial_energy_update_kernel.h"
namespace mindspore {
namespace kernel {
MS_REG_GPU_KERNEL_THREE(LJForceWithVirialEnergyUpdate,
KernelAttr()
.AddInputAttr(kNumberTypeUInt32)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddOutputAttr(kNumberTypeFloat32)
.AddOutputAttr(kNumberTypeFloat32)
.AddOutputAttr(kNumberTypeFloat32),
LJForceWithVirialEnergyUpdateGpuKernel, float, int, unsigned int)
} // namespace kernel
} // namespace mindspore

View File

@ -0,0 +1,152 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
*Note:
* LJForceWithVirialEnergyUpdate. This is an experimental interface that is subject to change and/or deletion.
*/
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_LJ_FORCE_WITH_VIRIAL_ENERGY_UPDATE_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_LJ_FORCE_WITH_VIRIAL_ENERGY_UPDATE_KERNEL_H_
#include <cuda_runtime_api.h>
#include <vector>
#include <string>
#include <map>
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
#include "runtime/device/gpu/cuda_common.h"
#include "backend/kernel_compiler/gpu/cuda_impl/sponge/lj/lj_direct_cf_force_with_lj_virial_direct_cf_energy_impl.cuh"
namespace mindspore {
namespace kernel {
template <typename T, typename T1, typename T2>
class LJForceWithVirialEnergyUpdateGpuKernel : public GpuKernel {
public:
LJForceWithVirialEnergyUpdateGpuKernel() : ele_uint_crd(1) {}
~LJForceWithVirialEnergyUpdateGpuKernel() override = default;
bool Init(const CNodePtr &kernel_node) override {
kernel_node_ = kernel_node;
atom_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "atom_numbers"));
max_neighbor_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "max_neighbor_numbers"));
cutoff = static_cast<float>(GetAttr<float_t>(kernel_node, "cutoff"));
pme_beta = static_cast<float>(GetAttr<float_t>(kernel_node, "pme_beta"));
need_update = static_cast<int>(GetAttr<int64_t>(kernel_node, "need_update"));
auto shape_uint_crd = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
auto shape_LJtype = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
auto shape_charge = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2);
auto shape_scaler = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3);
auto shape_nl_numbers = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 4);
auto shape_nl_serial = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 5);
auto shape_d_LJ_a = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 6);
auto shape_d_LJ_b = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 7);
for (size_t i = 0; i < shape_uint_crd.size(); i++) ele_uint_crd *= shape_uint_crd[i];
for (size_t i = 0; i < shape_LJtype.size(); i++) ele_LJtype *= shape_LJtype[i];
for (size_t i = 0; i < shape_charge.size(); i++) ele_charge *= shape_charge[i];
for (size_t i = 0; i < shape_scaler.size(); i++) ele_scaler *= shape_scaler[i];
for (size_t i = 0; i < shape_d_LJ_a.size(); i++) ele_d_LJ_a *= shape_d_LJ_a[i];
for (size_t i = 0; i < shape_d_LJ_b.size(); i++) ele_d_LJ_b *= shape_d_LJ_b[i];
InitSizeLists();
return true;
}
const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
auto uint_crd = GetDeviceAddress<T2>(inputs, 0);
auto LJtype = GetDeviceAddress<T1>(inputs, 1);
auto charge = GetDeviceAddress<T>(inputs, 2);
auto scaler = GetDeviceAddress<T>(inputs, 3);
auto nl_numbers = GetDeviceAddress<T1>(inputs, 4);
auto nl_serial = GetDeviceAddress<T1>(inputs, 5);
auto d_LJ_a = GetDeviceAddress<T>(inputs, 6);
auto d_LJ_b = GetDeviceAddress<T>(inputs, 7);
auto d_beta = GetDeviceAddress<T>(inputs, 8);
auto uint_crd_with_LJ = GetDeviceAddress<T>(workspace, 0);
auto nl = GetDeviceAddress<T1>(workspace, 1);
auto frc = GetDeviceAddress<T>(outputs, 0);
auto atom_lj_virial = GetDeviceAddress<T>(outputs, 1);
auto atom_energy = GetDeviceAddress<T>(outputs, 2);
if (need_update) {
cudaMemcpyAsync(&pme_beta, d_beta, sizeof(float), cudaMemcpyDeviceToHost,
reinterpret_cast<cudaStream_t>(stream_ptr));
cudaStreamSynchronize(reinterpret_cast<cudaStream_t>(stream_ptr));
}
LJ_Direct_CF_Force_With_LJ_Virial_Direct_CF_Energy(atom_numbers, cutoff, pme_beta, uint_crd, LJtype, charge, scaler,
uint_crd_with_LJ, nl_numbers, nl_serial, nl, d_LJ_a, d_LJ_b, frc,
atom_lj_virial, atom_energy, max_neighbor_numbers,
reinterpret_cast<cudaStream_t>(stream_ptr));
return true;
}
protected:
void InitSizeLists() override {
input_size_list_.push_back(ele_uint_crd * sizeof(T2));
input_size_list_.push_back(ele_LJtype * sizeof(T1));
input_size_list_.push_back(ele_charge * sizeof(T));
input_size_list_.push_back(ele_scaler * sizeof(T));
input_size_list_.push_back(atom_numbers * sizeof(T1));
input_size_list_.push_back(max_neighbor_numbers * sizeof(T1));
input_size_list_.push_back(ele_d_LJ_a * sizeof(T));
input_size_list_.push_back(ele_d_LJ_b * sizeof(T));
workspace_size_list_.push_back(atom_numbers * max_neighbor_numbers * sizeof(T1));
workspace_size_list_.push_back(atom_numbers * sizeof(UINT_VECTOR_LJ_TYPE));
output_size_list_.push_back(atom_numbers * 3 * sizeof(T));
output_size_list_.push_back(atom_numbers * sizeof(T));
output_size_list_.push_back(atom_numbers * sizeof(T));
}
private:
size_t ele_uint_crd = 1;
size_t ele_LJtype = 1;
size_t ele_charge = 1;
size_t ele_scaler = 1;
size_t ele_nl = 1;
size_t ele_d_LJ_a = 1;
size_t ele_d_LJ_b = 1;
std::vector<size_t> input_size_list_;
std::vector<size_t> output_size_list_;
std::vector<size_t> workspace_size_list_;
int atom_numbers;
int max_neighbor_numbers;
float pme_beta;
float cutoff;
int need_update;
struct UINT_VECTOR_LJ_TYPE {
unsigned int uint_x;
unsigned int uint_y;
unsigned int uint_z;
int LJ_type;
float charge;
};
struct NEIGHBOR_LIST {
int atom_numbers;
int *atom_serial;
};
};
} // namespace kernel
} // namespace mindspore
#endif

View File

@ -13,14 +13,19 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
*Note:
* MDIterationLeapFrog. This is an experimental interface that is subject to change and/or deletion.
*/
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_NVTIT_MD_ITERATION_LEAP_FROG_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_NVTIT_MD_ITERATION_LEAP_FROG_KERNEL_H_
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_MD_ITERATION_LEAP_FROG_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_MD_ITERATION_LEAP_FROG_KERNEL_H_
#include <cuda_runtime_api.h>
#include <vector>
#include <string>
#include <map>
#include <string>
#include <vector>
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
#include "runtime/device/gpu/cuda_common.h"
@ -31,25 +36,13 @@ namespace kernel {
template <typename T>
class MDIterationLeapFrogGpuKernel : public GpuKernel {
public:
MDIterationLeapFrogGpuKernel() : ele_mass_inverse(1) {}
MDIterationLeapFrogGpuKernel() {}
~MDIterationLeapFrogGpuKernel() override = default;
bool Init(const CNodePtr &kernel_node) override {
kernel_node_ = kernel_node;
float4_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "float4_numbers"));
atom_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "atom_numbers"));
half_dt = static_cast<float>(GetAttr<float>(kernel_node, "half_dt"));
dt = static_cast<float>(GetAttr<float>(kernel_node, "dt"));
exp_gamma = static_cast<float>(GetAttr<float>(kernel_node, "exp_gamma"));
is_max_velocity = static_cast<int>(GetAttr<int64_t>(kernel_node, "is_max_velocity"));
max_velocity = static_cast<float>(GetAttr<float>(kernel_node, "max_velocity"));
auto shape_mass_inverse = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
auto shape_qrt_mass = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
for (size_t i = 0; i < shape_mass_inverse.size(); i++) ele_mass_inverse *= shape_mass_inverse[i];
for (size_t i = 0; i < shape_qrt_mass.size(); i++) ele_sqrt_mass *= shape_qrt_mass[i];
InitSizeLists();
return true;
}
@ -58,49 +51,36 @@ class MDIterationLeapFrogGpuKernel : public GpuKernel {
const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
auto d_mass_inverse = GetDeviceAddress<const T>(inputs, 0);
auto d_sqrt_mass = GetDeviceAddress<const T>(inputs, 1);
auto vel_f = GetDeviceAddress<T>(outputs, 0);
auto crd_f = GetDeviceAddress<T>(outputs, 1);
auto frc_f = GetDeviceAddress<T>(outputs, 2);
auto acc_f = GetDeviceAddress<T>(outputs, 3);
MDIterationLeapFrog(float4_numbers, atom_numbers, half_dt, dt, exp_gamma, is_max_velocity, max_velocity,
d_mass_inverse, d_sqrt_mass, vel_f, crd_f, frc_f, acc_f,
reinterpret_cast<cudaStream_t>(stream_ptr));
auto vel = GetDeviceAddress<float>(inputs, 0);
auto crd = GetDeviceAddress<float>(inputs, 1);
auto frc = GetDeviceAddress<float>(inputs, 2);
auto acc = GetDeviceAddress<float>(inputs, 3);
auto inverse_mass = GetDeviceAddress<float>(inputs, 4);
MDIterationLeapFrog(atom_numbers, vel, crd, frc, acc, inverse_mass, dt, reinterpret_cast<cudaStream_t>(stream_ptr));
return true;
}
protected:
void InitSizeLists() override {
input_size_list_.push_back(ele_mass_inverse * sizeof(T));
input_size_list_.push_back(ele_sqrt_mass * sizeof(T));
input_size_list_.push_back(atom_numbers * 3 * sizeof(T));
input_size_list_.push_back(atom_numbers * 3 * sizeof(T));
input_size_list_.push_back(atom_numbers * 3 * sizeof(T));
input_size_list_.push_back(atom_numbers * 3 * sizeof(T));
input_size_list_.push_back(atom_numbers * sizeof(T));
output_size_list_.push_back(3 * atom_numbers * sizeof(T));
output_size_list_.push_back(3 * atom_numbers * sizeof(T));
output_size_list_.push_back(3 * atom_numbers * sizeof(T));
output_size_list_.push_back(3 * atom_numbers * sizeof(T));
output_size_list_.push_back(sizeof(T));
}
private:
size_t ele_mass_inverse = 1;
size_t ele_sqrt_mass = 1;
std::vector<size_t> input_size_list_;
std::vector<size_t> output_size_list_;
std::vector<size_t> workspace_size_list_;
int float4_numbers;
int atom_numbers;
float half_dt;
float dt;
float exp_gamma;
int is_max_velocity;
float max_velocity;
};
} // namespace kernel
} // namespace mindspore
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_NVTIT_MD_ITERATION_LEAP_FROG_KERNEL_H_
#endif

View File

@ -0,0 +1,42 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
*Note:
* PMEEnergyUpdate. This is an experimental interface that is subject to change and/or deletion.
*/
#include "backend/kernel_compiler/gpu/sponge/pme/pme_energy_update_kernel.h"
namespace mindspore {
namespace kernel {
MS_REG_GPU_KERNEL_TWO(PMEEnergyUpdate,
KernelAttr()
.AddInputAttr(kNumberTypeUInt32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddOutputAttr(kNumberTypeFloat32)
.AddOutputAttr(kNumberTypeFloat32)
.AddOutputAttr(kNumberTypeFloat32)
.AddOutputAttr(kNumberTypeFloat32),
PMEEnergyUpdateGpuKernel, float, int)
} // namespace kernel
} // namespace mindspore

View File

@ -0,0 +1,313 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
*Note:
* PMEEnergyUpdate. This is an experimental interface that is subject to change and/or deletion.
*/
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONGE_PME_PME_ENERGY_UPDATE_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONGE_PME_PME_ENERGY_UPDATE_KERNEL_H_
#include <cuda_runtime_api.h>
#include <cufft.h>
#include <vector>
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
#include "runtime/device/gpu/cuda_common.h"
#include "backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_energy_update_impl.cuh"
namespace mindspore {
namespace kernel {
template <typename T, typename T1>
class PMEEnergyUpdateGpuKernel : public GpuKernel {
public:
PMEEnergyUpdateGpuKernel() : ele_uint_crd(1) {}
~PMEEnergyUpdateGpuKernel() override = default;
bool Init(const CNodePtr &kernel_node) override {
kernel_node_ = kernel_node;
atom_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "atom_numbers"));
excluded_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "excluded_numbers"));
beta = static_cast<float>(GetAttr<float_t>(kernel_node, "beta"));
fftx = static_cast<int>(GetAttr<int64_t>(kernel_node, "fftx"));
ffty = static_cast<int>(GetAttr<int64_t>(kernel_node, "ffty"));
fftz = static_cast<int>(GetAttr<int64_t>(kernel_node, "fftz"));
float box_length_0 = static_cast<float>(GetAttr<float_t>(kernel_node, "box_length_0"));
float box_length_1 = static_cast<float>(GetAttr<float_t>(kernel_node, "box_length_1"));
float box_length_2 = static_cast<float>(GetAttr<float_t>(kernel_node, "box_length_2"));
max_neighbor_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "max_neighbor_numbers"));
need_update = static_cast<int>(GetAttr<int64_t>(kernel_node, "need_update"));
cufftPlan3d(&PME_plan_r2c, fftx, ffty, fftz, CUFFT_R2C);
cufftPlan3d(&PME_plan_c2r, fftx, ffty, fftz, CUFFT_C2R);
_thread_PME.x = 8;
_thread_PME.y = 8;
PME_Nin = ffty * fftz;
PME_Nfft = fftx * ffty * (fftz / 2 + 1);
PME_Nall = fftx * ffty * fftz;
PME_kxyz_cpu.resize(64);
std::vector<float> h_box_length{box_length_0, box_length_1, box_length_2};
VECTOR *box_length = reinterpret_cast<VECTOR *>(h_box_length.data());
volume = box_length[0].x * box_length[0].y * box_length[0].z;
int kx, ky, kz, kxrp, kyrp, kzrp, index;
for (kx = 0; kx < 4; kx++) {
for (ky = 0; ky < 4; ky++) {
for (kz = 0; kz < 4; kz++) {
index = kx * 16 + ky * 4 + kz;
PME_kxyz_cpu[index].uint_x = kx;
PME_kxyz_cpu[index].uint_y = ky;
PME_kxyz_cpu[index].uint_z = kz;
}
}
}
B1.resize(fftx);
B2.resize(ffty);
B3.resize(fftz);
PME_BC0.resize(PME_Nfft);
for (kx = 0; kx < fftx; kx++) {
B1[kx] = getb(kx, fftx, 4);
}
for (ky = 0; ky < ffty; ky++) {
B2[ky] = getb(ky, ffty, 4);
}
for (kz = 0; kz < fftz; kz++) {
B3[kz] = getb(kz, fftz, 4);
}
float mprefactor = PI * PI / -beta / beta;
float msq;
for (kx = 0; kx < fftx; kx++) {
kxrp = kx;
if (kx > fftx / 2) kxrp = fftx - kx;
for (ky = 0; ky < ffty; ky++) {
kyrp = ky;
if (ky > ffty / 2) kyrp = ffty - ky;
for (kz = 0; kz <= fftz / 2; kz++) {
kzrp = kz;
msq = kxrp * kxrp / box_length[0].x / box_length[0].x + kyrp * kyrp / box_length[0].y / box_length[0].y +
kzrp * kzrp / box_length[0].z / box_length[0].z;
index = kx * ffty * (fftz / 2 + 1) + ky * (fftz / 2 + 1) + kz;
if ((kx + ky + kz) == 0) {
PME_BC0[index] = 0;
} else {
PME_BC0[index] = 1.0 / PI / msq * exp(mprefactor * msq) / volume;
}
PME_BC0[index] *= B1[kx] * B2[ky] * B3[kz];
}
}
}
InitSizeLists();
return true;
}
const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
auto uint_crd = GetDeviceAddress<T1>(inputs, 0);
auto charge = GetDeviceAddress<T>(inputs, 1);
auto nl_numbers = GetDeviceAddress<T1>(inputs, 2);
auto nl_serial = GetDeviceAddress<T1>(inputs, 3);
auto scaler = GetDeviceAddress<T>(inputs, 4);
auto excluded_list_start = GetDeviceAddress<int>(inputs, 5);
auto excluded_list = GetDeviceAddress<int>(inputs, 6);
auto excluded_atom_numbers = GetDeviceAddress<int>(inputs, 7);
auto neutralizing_factor = GetDeviceAddress<T>(inputs, 8);
auto d_beta = GetDeviceAddress<T>(inputs, 9);
auto pme_uxyz = GetDeviceAddress<int>(workspace, 0); // workspace
auto pme_frxyz = GetDeviceAddress<float>(workspace, 1); // workspace
auto pme_q = GetDeviceAddress<T>(workspace, 2); // workspace
auto pme_fq = GetDeviceAddress<float>(workspace, 3); // workspace
auto pme_atom_near = GetDeviceAddress<int>(workspace, 4); // workspace
auto pme_bc = GetDeviceAddress<float>(workspace, 5); // workspace
auto pme_kxyz = GetDeviceAddress<int>(workspace, 6); // workspace
auto nl = GetDeviceAddress<T1>(workspace, 7);
auto charge_sum = GetDeviceAddress<float>(workspace, 8);
auto reciprocal_ene = GetDeviceAddress<T>(outputs, 0);
auto self_ene = GetDeviceAddress<T>(outputs, 1);
auto direct_ene = GetDeviceAddress<T>(outputs, 2);
auto correction_ene = GetDeviceAddress<T>(outputs, 3);
h_beta = beta;
if (need_update) {
cudaMemcpyAsync(&h_beta, d_beta, sizeof(float), cudaMemcpyDeviceToHost,
reinterpret_cast<cudaStream_t>(stream_ptr));
cudaStreamSynchronize(reinterpret_cast<cudaStream_t>(stream_ptr));
factor = h_beta / beta;
double factor_inverse = 1.0 / factor;
for (int i = 0; i < PME_Nfft; i++) {
PME_BC0[i] *= factor_inverse; // update PME_BC0
}
}
cufftSetStream(PME_plan_r2c, reinterpret_cast<cudaStream_t>(stream_ptr));
cufftSetStream(PME_plan_c2r, reinterpret_cast<cudaStream_t>(stream_ptr));
cudaMemcpyAsync(pme_kxyz, PME_kxyz_cpu.data(), sizeof(UNSIGNED_INT_VECTOR) * 64, cudaMemcpyHostToDevice,
reinterpret_cast<cudaStream_t>(stream_ptr));
cudaMemcpyAsync(pme_bc, PME_BC0.data(), sizeof(float) * PME_Nfft, cudaMemcpyHostToDevice,
reinterpret_cast<cudaStream_t>(stream_ptr));
PMEEnergyUpdate(fftx, ffty, fftz, atom_numbers, h_beta, pme_bc, pme_uxyz, pme_frxyz, pme_q, pme_fq, pme_atom_near,
pme_kxyz, uint_crd, charge, nl_numbers, nl_serial, nl, scaler, excluded_list_start, excluded_list,
excluded_atom_numbers, reciprocal_ene, self_ene, direct_ene, correction_ene, _thread_PME, PME_Nin,
PME_Nfft, PME_Nall, PME_plan_r2c, PME_plan_c2r, neutralizing_factor, charge_sum,
max_neighbor_numbers, reinterpret_cast<cudaStream_t>(stream_ptr));
return true;
}
protected:
void InitSizeLists() override {
input_size_list_.push_back(atom_numbers * sizeof(UNSIGNED_INT_VECTOR));
input_size_list_.push_back(atom_numbers * sizeof(VECTOR));
input_size_list_.push_back(atom_numbers * sizeof(T1));
input_size_list_.push_back(max_neighbor_numbers * sizeof(T1));
input_size_list_.push_back(sizeof(VECTOR));
input_size_list_.push_back(atom_numbers * sizeof(T1));
input_size_list_.push_back(excluded_numbers * sizeof(T1));
input_size_list_.push_back(atom_numbers * sizeof(T1));
input_size_list_.push_back(sizeof(T));
input_size_list_.push_back(sizeof(T));
workspace_size_list_.push_back(atom_numbers * sizeof(UNSIGNED_INT_VECTOR));
workspace_size_list_.push_back(atom_numbers * sizeof(VECTOR));
workspace_size_list_.push_back(PME_Nall * sizeof(T));
workspace_size_list_.push_back(PME_Nfft * sizeof(cufftComplex));
workspace_size_list_.push_back(atom_numbers * 64 * sizeof(int));
workspace_size_list_.push_back(PME_Nfft * sizeof(float));
workspace_size_list_.push_back(64 * sizeof(UNSIGNED_INT_VECTOR));
workspace_size_list_.push_back(atom_numbers * max_neighbor_numbers * sizeof(T1));
workspace_size_list_.push_back(sizeof(T));
output_size_list_.push_back(sizeof(T));
output_size_list_.push_back(sizeof(T));
output_size_list_.push_back(sizeof(T));
output_size_list_.push_back(sizeof(T));
}
cufftComplex expc(cufftComplex z) {
cufftComplex res;
float t = expf(z.x);
sincosf(z.y, &res.y, &res.x);
res.x *= t;
res.y *= t;
return res;
}
float M_(float u, int n) {
if (n == 2) {
if (u > 2 || u < 0) return 0;
return 1 - abs(u - 1);
} else {
return u / (n - 1) * M_(u, n - 1) + (n - u) / (n - 1) * M_(u - 1, n - 1);
}
}
float getb(int k, int NFFT, int B_order) {
cufftComplex tempc, tempc2, res;
float tempf;
tempc2.x = 0;
tempc2.y = 0;
tempc.x = 0;
if (NFFT == 0) {
MS_LOG(ERROR) << "Divide by zero.";
} else {
tempc.y = 2 * (B_order - 1) * PI * k / NFFT;
}
res = expc(tempc);
for (int kk = 0; kk < (B_order - 1); kk++) {
tempc.x = 0;
if (NFFT == 0) {
MS_LOG(ERROR) << "Divide by zero.";
break;
} else {
tempc.y = 2 * PI * k / NFFT * kk;
}
tempc = expc(tempc);
tempf = M_(kk + 1, B_order);
tempc2.x += tempf * tempc.x;
tempc2.y += tempf * tempc.y;
}
res = cuCdivf(res, tempc2);
return res.x * res.x + res.y * res.y;
}
private:
size_t ele_uint_crd = 1;
std::vector<size_t> input_size_list_;
std::vector<size_t> output_size_list_;
std::vector<size_t> workspace_size_list_;
std::vector<float> B1;
std::vector<float> B2;
std::vector<float> B3;
std::vector<float> PME_BC0;
int atom_numbers;
int excluded_numbers;
int max_neighbor_numbers;
int fftx;
int ffty;
int fftz;
float beta;
float factor;
float h_beta;
int PME_Nin;
int PME_Nall;
int PME_Nfft;
float volume;
float PI = 3.1415926;
int need_update;
cufftHandle PME_plan_r2c;
cufftHandle PME_plan_c2r;
dim3 _thread_PME;
struct VECTOR {
float x;
float y;
float z;
};
struct UNSIGNED_INT_VECTOR {
unsigned int uint_x;
unsigned int uint_y;
unsigned int uint_z;
};
std::vector<UNSIGNED_INT_VECTOR> PME_kxyz_cpu;
struct NEIGHBOR_LIST {
int atom_numbers;
int *atom_serial;
};
};
} // namespace kernel
} // namespace mindspore
#endif

View File

@ -13,17 +13,20 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
*Note:
* PMEExcludedForce. This is an experimental interface that is subject to change and/or deletion.
*/
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONGE_PME_PME_EXCLUDED_FORCE_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONGE_PME_PME_EXCLUDED_FORCE_KERNEL_H_
#include <cuda_runtime_api.h>
#include <cufft.h>
#include <vector>
#include <string>
#include <map>
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
#include "runtime/device/gpu/cuda_common.h"
#include "backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_excluded_force_impl.cuh"
namespace mindspore {
namespace kernel {
template <typename T, typename T1>

View File

@ -0,0 +1,36 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
*Note:
* PMEExcludedForceUpdate. This is an experimental interface that is subject to change and/or deletion.
*/
#include "backend/kernel_compiler/gpu/sponge/pme/pme_excluded_force_update_kernel.h"
namespace mindspore {
namespace kernel {
MS_REG_GPU_KERNEL_TWO(PMEExcludedForceUpdate,
KernelAttr()
.AddInputAttr(kNumberTypeUInt32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeFloat32)
.AddOutputAttr(kNumberTypeFloat32),
PMEExcludedForceUpdateGpuKernel, float, int)
} // namespace kernel
} // namespace mindspore

View File

@ -0,0 +1,112 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
*Note:
* PMEExcludedForceUpdate. This is an experimental interface that is subject to change and/or deletion.
*/
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONGE_PME_PME_EXCLUDED_FORCE_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONGE_PME_PME_EXCLUDED_FORCE_KERNEL_H_
#include <cuda_runtime_api.h>
#include <cufft.h>
#include <map>
#include <string>
#include <vector>
#include "runtime/device/gpu/cuda_common.h"
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
#include "backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_excluded_force_impl.cuh"
namespace mindspore {
namespace kernel {
template <typename T, typename T1>
class PMEExcludedForceUpdateGpuKernel : public GpuKernel {
public:
PMEExcludedForceUpdateGpuKernel() : ele_uint_crd(1) {}
~PMEExcludedForceUpdateGpuKernel() override = default;
bool Init(const CNodePtr &kernel_node) override {
kernel_node_ = kernel_node;
atom_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "atom_numbers"));
excluded_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "excluded_numbers"));
beta = static_cast<float>(GetAttr<float_t>(kernel_node, "beta"));
need_update = static_cast<int>(GetAttr<int64_t>(kernel_node, "need_update"));
InitSizeLists();
return true;
}
const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
auto uint_crd = GetDeviceAddress<int>(inputs, 0);
auto sacler = GetDeviceAddress<T>(inputs, 1);
auto charge = GetDeviceAddress<T>(inputs, 2);
auto excluded_list_start = GetDeviceAddress<int>(inputs, 3);
auto excluded_list = GetDeviceAddress<int>(inputs, 4);
auto excluded_atom_numbers = GetDeviceAddress<int>(inputs, 5);
auto d_beta = GetDeviceAddress<T>(inputs, 6);
if (need_update) {
cudaMemcpyAsync(&beta, d_beta, sizeof(float), cudaMemcpyDeviceToHost, reinterpret_cast<cudaStream_t>(stream_ptr));
cudaStreamSynchronize(reinterpret_cast<cudaStream_t>(stream_ptr));
}
auto force = GetDeviceAddress<T>(outputs, 0);
PMEExcludedForce(atom_numbers, beta, uint_crd, sacler, charge, excluded_list_start, excluded_list,
excluded_atom_numbers, force, reinterpret_cast<cudaStream_t>(stream_ptr));
return true;
}
protected:
void InitSizeLists() override {
input_size_list_.push_back(atom_numbers * sizeof(UNSIGNED_INT_VECTOR));
input_size_list_.push_back(sizeof(VECTOR));
input_size_list_.push_back(atom_numbers * sizeof(T));
input_size_list_.push_back(atom_numbers * sizeof(T1));
input_size_list_.push_back(excluded_numbers * sizeof(T1));
input_size_list_.push_back(atom_numbers * sizeof(T1));
input_size_list_.push_back(sizeof(T));
output_size_list_.push_back(atom_numbers * 3 * sizeof(T));
}
private:
size_t ele_uint_crd = 1;
std::vector<size_t> input_size_list_;
std::vector<size_t> output_size_list_;
std::vector<size_t> workspace_size_list_;
int atom_numbers;
int excluded_numbers;
float beta;
float factor;
int need_update;
struct VECTOR {
float x;
float y;
float z;
};
struct UNSIGNED_INT_VECTOR {
unsigned int uint_x;
unsigned int uint_y;
unsigned int uint_z;
};
};
} // namespace kernel
} // namespace mindspore
#endif

View File

@ -13,6 +13,10 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
*Note:
* PMEReciprocalForce. This is an experimental interface that is subject to change and/or deletion.
*/
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONGE_PME_PME_RECIPROCAL_FORCE_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONGE_PME_PME_RECIPROCAL_FORCE_KERNEL_H_
#include <cuda_runtime_api.h>
@ -144,9 +148,8 @@ class PMEReciprocalForceGpuKernel : public GpuKernel {
protected:
void InitSizeLists() override {
input_size_list_.push_back(sizeof(VECTOR));
input_size_list_.push_back(atom_numbers * sizeof(UNSIGNED_INT_VECTOR));
input_size_list_.push_back(atom_numbers * sizeof(VECTOR));
input_size_list_.push_back(atom_numbers * sizeof(float));
workspace_size_list_.push_back(atom_numbers * sizeof(UNSIGNED_INT_VECTOR));
workspace_size_list_.push_back(atom_numbers * sizeof(VECTOR));

View File

@ -0,0 +1,32 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
*Note:
* PMEReciprocalForceUpdate. This is an experimental interface that is subject to change and/or deletion.
*/
#include "backend/kernel_compiler/gpu/sponge/pme/pme_reciprocal_force_update_kernel.h"
namespace mindspore {
namespace kernel {
MS_REG_GPU_KERNEL_TWO(PMEReciprocalForceUpdate,
KernelAttr()
.AddInputAttr(kNumberTypeUInt32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddOutputAttr(kNumberTypeFloat32),
PMEReciprocalForceUpdateGpuKernel, float, int)
} // namespace kernel
} // namespace mindspore

View File

@ -0,0 +1,280 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
*Note:
* PMEReciprocalForceUpdate. This is an experimental interface that is subject to change and/or deletion.
*/
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONGE_PME_PME_RECIPROCAL_FORCE_UPDATE_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONGE_PME_PME_RECIPROCAL_FORCE_UPDATE_KERNEL_H_
#include <cuda_runtime_api.h>
#include <cufft.h>
#include <vector>
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
#include "runtime/device/gpu/cuda_common.h"
#include "backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_reciprocal_force_impl.cuh"
namespace mindspore {
namespace kernel {
template <typename T, typename T1>
class PMEReciprocalForceUpdateGpuKernel : public GpuKernel {
public:
PMEReciprocalForceUpdateGpuKernel() : ele_uint_crd(1) {}
~PMEReciprocalForceUpdateGpuKernel() override = default;
bool Init(const CNodePtr &kernel_node) override {
kernel_node_ = kernel_node;
atom_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "atom_numbers"));
beta = static_cast<float>(GetAttr<float_t>(kernel_node, "beta"));
fftx = static_cast<int>(GetAttr<int64_t>(kernel_node, "fftx"));
ffty = static_cast<int>(GetAttr<int64_t>(kernel_node, "ffty"));
fftz = static_cast<int>(GetAttr<int64_t>(kernel_node, "fftz"));
float box_length_0 = static_cast<float>(GetAttr<float_t>(kernel_node, "box_length_0"));
float box_length_1 = static_cast<float>(GetAttr<float_t>(kernel_node, "box_length_1"));
float box_length_2 = static_cast<float>(GetAttr<float_t>(kernel_node, "box_length_2"));
need_update = static_cast<int>(GetAttr<int64_t>(kernel_node, "need_update"));
PME_Nall = fftx * ffty * fftz;
PME_Nfft = fftx * ffty * (fftz / 2 + 1);
PME_Nin = ffty * fftz;
cufftPlan3d(&PME_plan_r2c, fftx, ffty, fftz, CUFFT_R2C);
cufftPlan3d(&PME_plan_c2r, fftx, ffty, fftz, CUFFT_C2R);
std::vector<float> h_box_length{box_length_0, box_length_1, box_length_2};
VECTOR *box_length = reinterpret_cast<VECTOR *>(h_box_length.data());
PME_inverse_box_vector0.x = static_cast<float>(fftx) / box_length[0].x;
PME_inverse_box_vector0.y = static_cast<float>(ffty) / box_length[0].y;
PME_inverse_box_vector0.z = static_cast<float>(fftz) / box_length[0].z;
PME_inverse_box_vector.x = PME_inverse_box_vector0.x;
PME_inverse_box_vector.y = PME_inverse_box_vector0.y;
PME_inverse_box_vector.z = PME_inverse_box_vector0.z;
float volume = box_length[0].x * box_length[0].y * box_length[0].z;
PME_kxyz_cpu.resize(64);
int kx, ky, kz, kxrp, kyrp, kzrp, index;
for (kx = 0; kx < 4; kx++) {
for (ky = 0; ky < 4; ky++) {
for (kz = 0; kz < 4; kz++) {
index = kx * 16 + ky * 4 + kz;
PME_kxyz_cpu[index].uint_x = kx;
PME_kxyz_cpu[index].uint_y = ky;
PME_kxyz_cpu[index].uint_z = kz;
}
}
}
B1.resize(fftx);
B2.resize(ffty);
B3.resize(fftz);
PME_BC0.resize(PME_Nfft);
for (kx = 0; kx < fftx; kx++) {
B1[kx] = getb(kx, fftx, 4);
}
for (ky = 0; ky < ffty; ky++) {
B2[ky] = getb(ky, ffty, 4);
}
for (kz = 0; kz < fftz; kz++) {
B3[kz] = getb(kz, fftz, 4);
}
float mprefactor = PI * PI / -beta / beta;
float msq;
for (kx = 0; kx < fftx; kx++) {
kxrp = kx;
if (kx > fftx / 2) kxrp = fftx - kx;
for (ky = 0; ky < ffty; ky++) {
kyrp = ky;
if (ky > ffty / 2) kyrp = ffty - ky;
for (kz = 0; kz <= fftz / 2; kz++) {
kzrp = kz;
msq = kxrp * kxrp / box_length[0].x / box_length[0].x + kyrp * kyrp / box_length[0].y / box_length[0].y +
kzrp * kzrp / box_length[0].z / box_length[0].z;
index = kx * ffty * (fftz / 2 + 1) + ky * (fftz / 2 + 1) + kz;
if ((kx + ky + kz) == 0) {
PME_BC0[index] = 0;
} else {
PME_BC0[index] = 1.0 / PI / msq * exp(mprefactor * msq) / volume;
}
PME_BC0[index] *= B1[kx] * B2[ky] * B3[kz];
}
}
}
PME_BC = PME_BC0;
InitSizeLists();
return true;
}
const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
auto uint_crd = GetDeviceAddress<const T1>(inputs, 0);
auto charge = GetDeviceAddress<T>(inputs, 1);
auto d_beta = GetDeviceAddress<T>(inputs, 2);
auto pme_uxyz = GetDeviceAddress<int>(workspace, 0); // workspace
auto pme_frxyz = GetDeviceAddress<float>(workspace, 1); // workspace
auto pme_q = GetDeviceAddress<T>(workspace, 2); // workspace
auto pme_fq = GetDeviceAddress<float>(workspace, 3); // workspace
auto pme_atom_near = GetDeviceAddress<int>(workspace, 4); // workspace
auto pme_bc = GetDeviceAddress<float>(workspace, 5); // workspace
auto pme_kxyz = GetDeviceAddress<int>(workspace, 6); // workspace
auto force = GetDeviceAddress<T>(outputs, 0);
h_beta = beta;
if (need_update) {
cudaMemcpyAsync(&h_beta, d_beta, sizeof(float), cudaMemcpyDeviceToHost,
reinterpret_cast<cudaStream_t>(stream_ptr));
cudaStreamSynchronize(reinterpret_cast<cudaStream_t>(stream_ptr));
factor = h_beta / beta;
double factor_inverse = 1.0 / factor;
PME_inverse_box_vector.x = PME_inverse_box_vector0.x * factor_inverse;
PME_inverse_box_vector.y = PME_inverse_box_vector0.y * factor_inverse;
PME_inverse_box_vector.z = PME_inverse_box_vector0.z * factor_inverse;
for (int i = 0; i < PME_Nfft; i++) {
PME_BC[i] = PME_BC0[i] * factor_inverse;
}
}
cufftSetStream(PME_plan_r2c, reinterpret_cast<cudaStream_t>(stream_ptr));
cufftSetStream(PME_plan_c2r, reinterpret_cast<cudaStream_t>(stream_ptr));
cudaMemcpyAsync(pme_kxyz, PME_kxyz_cpu.data(), sizeof(UNSIGNED_INT_VECTOR) * 64, cudaMemcpyHostToDevice,
reinterpret_cast<cudaStream_t>(stream_ptr));
cudaMemcpyAsync(pme_bc, PME_BC.data(), sizeof(float) * PME_Nfft, cudaMemcpyHostToDevice,
reinterpret_cast<cudaStream_t>(stream_ptr));
PMEReciprocalForce(fftx, ffty, fftz, atom_numbers, beta, pme_bc, pme_uxyz, pme_frxyz, pme_q, pme_fq, pme_atom_near,
pme_kxyz, uint_crd, charge, force, PME_Nin, PME_Nall, PME_Nfft, PME_plan_r2c, PME_plan_c2r,
PME_inverse_box_vector, reinterpret_cast<cudaStream_t>(stream_ptr));
return true;
}
protected:
void InitSizeLists() override {
input_size_list_.push_back(atom_numbers * sizeof(UNSIGNED_INT_VECTOR));
input_size_list_.push_back(atom_numbers * sizeof(float));
input_size_list_.push_back(sizeof(float));
workspace_size_list_.push_back(atom_numbers * sizeof(UNSIGNED_INT_VECTOR));
workspace_size_list_.push_back(atom_numbers * sizeof(VECTOR));
workspace_size_list_.push_back(PME_Nall * sizeof(T));
workspace_size_list_.push_back(PME_Nfft * sizeof(cufftComplex));
workspace_size_list_.push_back(atom_numbers * 64 * sizeof(int));
workspace_size_list_.push_back(PME_Nfft * sizeof(float));
workspace_size_list_.push_back(64 * sizeof(UNSIGNED_INT_VECTOR));
output_size_list_.push_back(atom_numbers * sizeof(VECTOR));
}
cufftComplex expc(cufftComplex z) {
cufftComplex res;
float t = expf(z.x);
sincosf(z.y, &res.y, &res.x);
res.x *= t;
res.y *= t;
return res;
}
float M_(float u, int n) {
if (n == 2) {
if (u > 2 || u < 0) return 0;
return 1 - abs(u - 1);
} else {
return u / (n - 1) * M_(u, n - 1) + (n - u) / (n - 1) * M_(u - 1, n - 1);
}
}
float getb(int k, int NFFT, int B_order) {
cufftComplex tempc, tempc2, res;
float tempf;
tempc2.x = 0;
tempc2.y = 0;
tempc.x = 0;
if (NFFT == 0) {
MS_LOG(ERROR) << "Divide by zero.";
} else {
tempc.y = 2 * (B_order - 1) * PI * k / NFFT;
}
res = expc(tempc);
for (int kk = 0; kk < (B_order - 1); kk++) {
tempc.x = 0;
if (NFFT == 0) {
MS_LOG(ERROR) << "Divide by zero.";
break;
} else {
tempc.y = 2 * PI * k / NFFT * kk;
}
tempc = expc(tempc);
tempf = M_(kk + 1, B_order);
tempc2.x += tempf * tempc.x;
tempc2.y += tempf * tempc.y;
}
res = cuCdivf(res, tempc2);
return res.x * res.x + res.y * res.y;
}
private:
size_t ele_uint_crd = 1;
std::vector<size_t> input_size_list_;
std::vector<size_t> output_size_list_;
std::vector<size_t> workspace_size_list_;
int atom_numbers;
int fftx;
int ffty;
int fftz;
float beta;
float h_beta;
float factor;
int PME_Nall;
int PME_Nfft;
int PME_Nin;
float PI = 3.1415926;
int need_update;
std::vector<float> B1;
std::vector<float> B2;
std::vector<float> B3;
std::vector<float> PME_BC0;
std::vector<float> PME_BC;
cufftHandle PME_plan_r2c;
cufftHandle PME_plan_c2r;
struct VECTOR {
float x;
float y;
float z;
};
_VECTOR PME_inverse_box_vector0;
_VECTOR PME_inverse_box_vector;
struct UNSIGNED_INT_VECTOR {
unsigned int uint_x;
unsigned int uint_y;
unsigned int uint_z;
};
std::vector<UNSIGNED_INT_VECTOR> PME_kxyz_cpu;
};
} // namespace kernel
} // namespace mindspore
#endif

View File

@ -13,12 +13,16 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
*Note:
* ConstrainForceCycleWithVirial. This is an experimental interface that is subject to change and/or deletion.
*/
#include "backend/kernel_compiler/gpu/sponge/simple_constrain/constrain_force_cycle_with_virial_kernel.h"
namespace mindspore {
namespace kernel {
MS_REG_GPU_KERNEL_THREE(constrainforcecyclewithvirial,
MS_REG_GPU_KERNEL_THREE(ConstrainForceCycleWithVirial,
KernelAttr()
.AddInputAttr(kNumberTypeUInt32)
.AddInputAttr(kNumberTypeFloat32)

View File

@ -0,0 +1,41 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
*Note:
* ConstrainForce. This is an experimental interface that is subject to change and/or deletion.
*/
#include "backend/kernel_compiler/gpu/sponge/simple_constrain/constrain_force_kernel.h"
namespace mindspore {
namespace kernel {
MS_REG_GPU_KERNEL_THREE(ConstrainForce,
KernelAttr()
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddOutputAttr(kNumberTypeUInt32)
.AddOutputAttr(kNumberTypeFloat32)
.AddOutputAttr(kNumberTypeFloat32),
ConstrainForceGpuKernel, float, int, unsigned int)
} // namespace kernel
} // namespace mindspore

View File

@ -0,0 +1,162 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
*Note:
* ConstrainForce. This is an experimental interface that is subject to change and/or deletion.
*/
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_SIMPLE_CONSTRAIN_CONSTRAIN_FORCE_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_SIMPLE_CONSTRAIN_CONSTRAIN_FORCE_KERNEL_H_
#include "backend/kernel_compiler/gpu/cuda_impl/sponge/simple_constrain/constrain_force_virial_impl.cuh"
#include <cuda_runtime_api.h>
#include <map>
#include <string>
#include <vector>
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
#include "runtime/device/gpu/cuda_common.h"
namespace mindspore {
namespace kernel {
template <typename T, typename T1, typename T2>
class ConstrainForceGpuKernel : public GpuKernel {
public:
ConstrainForceGpuKernel() : ele_crd(1) {}
~ConstrainForceGpuKernel() override = default;
bool Init(const CNodePtr &kernel_node) override {
kernel_node_ = kernel_node;
atom_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "atom_numbers"));
constrain_pair_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "constrain_pair_numbers"));
iteration_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "iteration_numbers"));
half_exp_gamma_plus_half = static_cast<float>(GetAttr<float>(kernel_node, "half_exp_gamma_plus_half"));
auto shape_crd = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
auto shape_quarter_cof = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
auto shape_mass_inverse = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2);
auto shape_scaler = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3);
auto shape_pair_dr = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 4);
auto shape_atom_i_serials = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 5);
auto shape_atom_j_serials = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 6);
auto shape_constant_rs = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 7);
auto shape_constrain_ks = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 8);
for (size_t i = 0; i < shape_scaler.size(); i++) ele_scaler *= shape_scaler[i];
for (size_t i = 0; i < shape_pair_dr.size(); i++) ele_pair_dr *= shape_pair_dr[i];
for (size_t i = 0; i < shape_atom_i_serials.size(); i++) ele_atom_i_serials *= shape_atom_i_serials[i];
for (size_t i = 0; i < shape_atom_j_serials.size(); i++) ele_atom_j_serials *= shape_atom_j_serials[i];
for (size_t i = 0; i < shape_constant_rs.size(); i++) ele_constant_rs *= shape_constant_rs[i];
for (size_t i = 0; i < shape_constrain_ks.size(); i++) ele_constrain_ks *= shape_constrain_ks[i];
for (size_t i = 0; i < shape_crd.size(); i++) ele_crd *= shape_crd[i];
for (size_t i = 0; i < shape_quarter_cof.size(); i++) ele_quarter_cof *= shape_quarter_cof[i];
for (size_t i = 0; i < shape_mass_inverse.size(); i++) ele_mass_inverse *= shape_mass_inverse[i];
InitSizeLists();
return true;
}
const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
auto crd = GetDeviceAddress<const T>(inputs, 0);
auto quarter_cof = GetDeviceAddress<const T>(inputs, 1);
auto mass_inverse = GetDeviceAddress<const T>(inputs, 2);
auto scaler = GetDeviceAddress<const T>(inputs, 3);
auto pair_dr = GetDeviceAddress<const T>(inputs, 4);
auto atom_i_serials = GetDeviceAddress<const T1>(inputs, 5);
auto atom_j_serials = GetDeviceAddress<const T1>(inputs, 6);
auto constant_rs = GetDeviceAddress<const T>(inputs, 7);
auto constrain_ks = GetDeviceAddress<const T>(inputs, 8);
auto constrain_pair = GetDeviceAddress<T>(workspace, 0);
auto uint_crd = GetDeviceAddress<T2>(outputs, 0);
auto test_frc_f = GetDeviceAddress<T>(outputs, 1);
auto d_atom_virial = GetDeviceAddress<T>(outputs, 2);
set_zero_force_with_virial(atom_numbers, constrain_pair_numbers, test_frc_f, d_atom_virial,
reinterpret_cast<cudaStream_t>(stream_ptr));
for (int i = 0; i < iteration_numbers; i++) {
refresh_uint_crd_update(atom_numbers, half_exp_gamma_plus_half, crd, quarter_cof, test_frc_f, mass_inverse,
uint_crd, reinterpret_cast<cudaStream_t>(stream_ptr));
constrain_force_cycle_update(atom_numbers, constrain_pair_numbers, uint_crd, scaler, constrain_pair, pair_dr,
atom_i_serials, atom_j_serials, constant_rs, constrain_ks, test_frc_f,
reinterpret_cast<cudaStream_t>(stream_ptr));
}
return true;
}
protected:
void InitSizeLists() override {
input_size_list_.push_back(ele_crd * sizeof(T));
input_size_list_.push_back(ele_quarter_cof * sizeof(T));
input_size_list_.push_back(ele_mass_inverse * sizeof(T));
input_size_list_.push_back(ele_scaler * sizeof(T));
input_size_list_.push_back(ele_pair_dr * sizeof(T));
input_size_list_.push_back(ele_atom_i_serials * sizeof(T1));
input_size_list_.push_back(ele_atom_j_serials * sizeof(T1));
input_size_list_.push_back(ele_constant_rs * sizeof(T));
input_size_list_.push_back(ele_constrain_ks * sizeof(T));
workspace_size_list_.push_back(constrain_pair_numbers * sizeof(CONSTRAIN_PAIR));
output_size_list_.push_back(3 * atom_numbers * sizeof(T2));
output_size_list_.push_back(3 * atom_numbers * sizeof(T));
output_size_list_.push_back(constrain_pair_numbers * sizeof(T));
}
private:
size_t ele_scaler = 1;
size_t ele_pair_dr = 1;
size_t ele_atom_i_serials = 1;
size_t ele_atom_j_serials = 1;
size_t ele_constant_rs = 1;
size_t ele_constrain_ks = 1;
size_t ele_crd = 1;
size_t ele_quarter_cof = 1;
size_t ele_mass_inverse = 1;
std::vector<size_t> input_size_list_;
std::vector<size_t> output_size_list_;
std::vector<size_t> workspace_size_list_;
int atom_numbers;
int constrain_pair_numbers;
int iteration_numbers;
int need_pressure;
float half_exp_gamma_plus_half;
struct CONSTRAIN_PAIR {
int atom_i_serial;
int atom_j_serial;
float constant_r;
float constrain_k;
};
};
} // namespace kernel
} // namespace mindspore
#endif

View File

@ -0,0 +1,42 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
*Note:
* ConstrainForceVirial. This is an experimental interface that is subject to change and/or deletion.
*/
#include "backend/kernel_compiler/gpu/sponge/simple_constrain/constrain_force_virial_kernel.h"
namespace mindspore {
namespace kernel {
MS_REG_GPU_KERNEL_THREE(ConstrainForceVirial,
KernelAttr()
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddOutputAttr(kNumberTypeUInt32)
.AddOutputAttr(kNumberTypeFloat32)
.AddOutputAttr(kNumberTypeFloat32),
ConstrainForceVirialGpuKernel, float, int, unsigned int)
} // namespace kernel
} // namespace mindspore

View File

@ -0,0 +1,159 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
*Note:
* ConstrainForceVirial. This is an experimental interface that is subject to change and/or deletion.
*/
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_SIMPLE_CONSTRAIN_CONSTRAIN_FORCE_VIRIAL_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_SIMPLE_CONSTRAIN_CONSTRAIN_FORCE_VIRIAL_KERNEL_H_
#include "backend/kernel_compiler/gpu/cuda_impl/sponge/simple_constrain/constrain_force_virial_impl.cuh"
#include <cuda_runtime_api.h>
#include <map>
#include <string>
#include <vector>
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
#include "runtime/device/gpu/cuda_common.h"
namespace mindspore {
namespace kernel {
template <typename T, typename T1, typename T2>
class ConstrainForceVirialGpuKernel : public GpuKernel {
public:
ConstrainForceVirialGpuKernel() : ele_crd(1) {}
~ConstrainForceVirialGpuKernel() override = default;
bool Init(const CNodePtr &kernel_node) override {
kernel_node_ = kernel_node;
atom_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "atom_numbers"));
constrain_pair_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "constrain_pair_numbers"));
iteration_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "iteration_numbers"));
half_exp_gamma_plus_half = static_cast<float>(GetAttr<float>(kernel_node, "half_exp_gamma_plus_half"));
auto shape_crd = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
auto shape_quarter_cof = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
auto shape_mass_inverse = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2);
auto shape_scaler = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3);
auto shape_pair_dr = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 4);
auto shape_atom_i_serials = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 5);
auto shape_atom_j_serials = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 6);
auto shape_constant_rs = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 7);
auto shape_constrain_ks = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 8);
for (size_t i = 0; i < shape_scaler.size(); i++) ele_scaler *= shape_scaler[i];
for (size_t i = 0; i < shape_pair_dr.size(); i++) ele_pair_dr *= shape_pair_dr[i];
for (size_t i = 0; i < shape_atom_i_serials.size(); i++) ele_atom_i_serials *= shape_atom_i_serials[i];
for (size_t i = 0; i < shape_atom_j_serials.size(); i++) ele_atom_j_serials *= shape_atom_j_serials[i];
for (size_t i = 0; i < shape_constant_rs.size(); i++) ele_constant_rs *= shape_constant_rs[i];
for (size_t i = 0; i < shape_constrain_ks.size(); i++) ele_constrain_ks *= shape_constrain_ks[i];
for (size_t i = 0; i < shape_crd.size(); i++) ele_crd *= shape_crd[i];
for (size_t i = 0; i < shape_quarter_cof.size(); i++) ele_quarter_cof *= shape_quarter_cof[i];
for (size_t i = 0; i < shape_mass_inverse.size(); i++) ele_mass_inverse *= shape_mass_inverse[i];
InitSizeLists();
return true;
}
const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
auto crd = GetDeviceAddress<const T>(inputs, 0);
auto quarter_cof = GetDeviceAddress<const T>(inputs, 1);
auto mass_inverse = GetDeviceAddress<const T>(inputs, 2);
auto scaler = GetDeviceAddress<const T>(inputs, 3);
auto pair_dr = GetDeviceAddress<const T>(inputs, 4);
auto atom_i_serials = GetDeviceAddress<const T1>(inputs, 5);
auto atom_j_serials = GetDeviceAddress<const T1>(inputs, 6);
auto constant_rs = GetDeviceAddress<const T>(inputs, 7);
auto constrain_ks = GetDeviceAddress<const T>(inputs, 8);
auto constrain_pair = GetDeviceAddress<T>(workspace, 0);
auto uint_crd = GetDeviceAddress<T2>(outputs, 0);
auto test_frc_f = GetDeviceAddress<T>(outputs, 1);
auto d_atom_virial = GetDeviceAddress<T>(outputs, 2);
set_zero_force_with_virial(atom_numbers, constrain_pair_numbers, test_frc_f, d_atom_virial,
reinterpret_cast<cudaStream_t>(stream_ptr));
for (int i = 0; i < iteration_numbers; i++) {
refresh_uint_crd_update(atom_numbers, half_exp_gamma_plus_half, crd, quarter_cof, test_frc_f, mass_inverse,
uint_crd, reinterpret_cast<cudaStream_t>(stream_ptr));
constrain_force_cycle_with_virial_update(atom_numbers, constrain_pair_numbers, uint_crd, scaler, constrain_pair,
pair_dr, atom_i_serials, atom_j_serials, constant_rs, constrain_ks,
test_frc_f, d_atom_virial, reinterpret_cast<cudaStream_t>(stream_ptr));
}
return true;
}
protected:
void InitSizeLists() override {
input_size_list_.push_back(ele_crd * sizeof(T));
input_size_list_.push_back(ele_quarter_cof * sizeof(T));
input_size_list_.push_back(ele_mass_inverse * sizeof(T));
input_size_list_.push_back(ele_scaler * sizeof(T));
input_size_list_.push_back(ele_pair_dr * sizeof(T));
input_size_list_.push_back(ele_atom_i_serials * sizeof(T1));
input_size_list_.push_back(ele_atom_j_serials * sizeof(T1));
input_size_list_.push_back(ele_constant_rs * sizeof(T));
input_size_list_.push_back(ele_constrain_ks * sizeof(T));
workspace_size_list_.push_back(constrain_pair_numbers * sizeof(CONSTRAIN_PAIR));
output_size_list_.push_back(3 * atom_numbers * sizeof(T2));
output_size_list_.push_back(3 * atom_numbers * sizeof(T));
output_size_list_.push_back(constrain_pair_numbers * sizeof(T));
}
private:
size_t ele_scaler = 1;
size_t ele_pair_dr = 1;
size_t ele_atom_i_serials = 1;
size_t ele_atom_j_serials = 1;
size_t ele_constant_rs = 1;
size_t ele_constrain_ks = 1;
size_t ele_crd = 1;
size_t ele_quarter_cof = 1;
size_t ele_mass_inverse = 1;
std::vector<size_t> input_size_list_;
std::vector<size_t> output_size_list_;
std::vector<size_t> workspace_size_list_;
int atom_numbers;
int constrain_pair_numbers;
int iteration_numbers;
int need_pressure;
float half_exp_gamma_plus_half;
struct CONSTRAIN_PAIR {
int atom_i_serial;
int atom_j_serial;
float constant_r;
float constrain_k;
};
};
} // namespace kernel
} // namespace mindspore
#endif

View File

@ -0,0 +1,42 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
*Note:
* Constrain. This is an experimental interface that is subject to change and/or deletion.
*/
#include "backend/kernel_compiler/gpu/sponge/simple_constrain/constrain_kernel.h"
namespace mindspore {
namespace kernel {
MS_REG_GPU_KERNEL_THREE(Constrain,
KernelAttr()
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeInt32)
.AddOutputAttr(kNumberTypeUInt32)
.AddOutputAttr(kNumberTypeFloat32)
.AddOutputAttr(kNumberTypeFloat32),
ConstrainGpuKernel, float, int, unsigned int)
} // namespace kernel
} // namespace mindspore

View File

@ -0,0 +1,176 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
*Note:
* Constrain. This is an experimental interface that is subject to change and/or deletion.
*/
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_SIMPLE_CONSTRAIN_CONSTRAIN_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_SIMPLE_CONSTRAIN_CONSTRAIN_KERNEL_H_
#include "backend/kernel_compiler/gpu/cuda_impl/sponge/simple_constrain/constrain_force_virial_impl.cuh"
#include <cuda_runtime_api.h>
#include <map>
#include <string>
#include <vector>
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
#include "runtime/device/gpu/cuda_common.h"
namespace mindspore {
namespace kernel {
template <typename T, typename T1, typename T2>
class ConstrainGpuKernel : public GpuKernel {
public:
ConstrainGpuKernel() : ele_crd(1), step(1) {}
~ConstrainGpuKernel() override = default;
bool Init(const CNodePtr &kernel_node) override {
// get bond_numbers
kernel_node_ = kernel_node;
atom_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "atom_numbers"));
constrain_pair_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "constrain_pair_numbers"));
iteration_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "iteration_numbers"));
half_exp_gamma_plus_half = static_cast<float>(GetAttr<float>(kernel_node, "half_exp_gamma_plus_half"));
auto shape_crd = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
auto shape_quarter_cof = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
auto shape_mass_inverse = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2);
auto shape_scaler = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3);
auto shape_pair_dr = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 4);
auto shape_atom_i_serials = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 5);
auto shape_atom_j_serials = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 6);
auto shape_constant_rs = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 7);
auto shape_constrain_ks = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 8);
for (size_t i = 0; i < shape_scaler.size(); i++) ele_scaler *= shape_scaler[i];
for (size_t i = 0; i < shape_pair_dr.size(); i++) ele_pair_dr *= shape_pair_dr[i];
for (size_t i = 0; i < shape_atom_i_serials.size(); i++) ele_atom_i_serials *= shape_atom_i_serials[i];
for (size_t i = 0; i < shape_atom_j_serials.size(); i++) ele_atom_j_serials *= shape_atom_j_serials[i];
for (size_t i = 0; i < shape_constant_rs.size(); i++) ele_constant_rs *= shape_constant_rs[i];
for (size_t i = 0; i < shape_constrain_ks.size(); i++) ele_constrain_ks *= shape_constrain_ks[i];
for (size_t i = 0; i < shape_crd.size(); i++) ele_crd *= shape_crd[i];
for (size_t i = 0; i < shape_quarter_cof.size(); i++) ele_quarter_cof *= shape_quarter_cof[i];
for (size_t i = 0; i < shape_mass_inverse.size(); i++) ele_mass_inverse *= shape_mass_inverse[i];
InitSizeLists();
return true;
}
const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
auto crd = GetDeviceAddress<const T>(inputs, 0);
auto quarter_cof = GetDeviceAddress<const T>(inputs, 1);
auto mass_inverse = GetDeviceAddress<const T>(inputs, 2);
auto scaler = GetDeviceAddress<const T>(inputs, 3);
auto pair_dr = GetDeviceAddress<const T>(inputs, 4);
auto atom_i_serials = GetDeviceAddress<const T1>(inputs, 5);
auto atom_j_serials = GetDeviceAddress<const T1>(inputs, 6);
auto constant_rs = GetDeviceAddress<const T>(inputs, 7);
auto constrain_ks = GetDeviceAddress<const T>(inputs, 8);
auto d_need_pressure = GetDeviceAddress<T1>(inputs, 9);
auto constrain_pair = GetDeviceAddress<T>(workspace, 0);
auto uint_crd = GetDeviceAddress<T2>(outputs, 0);
auto test_frc_f = GetDeviceAddress<T>(outputs, 1);
auto d_atom_virial = GetDeviceAddress<T>(outputs, 2);
cudaMemcpyAsync(&need_pressure, d_need_pressure, sizeof(int), cudaMemcpyDeviceToHost,
reinterpret_cast<cudaStream_t>(stream_ptr));
cudaStreamSynchronize(reinterpret_cast<cudaStream_t>(stream_ptr));
set_zero_force_with_virial(atom_numbers, constrain_pair_numbers, test_frc_f, d_atom_virial,
reinterpret_cast<cudaStream_t>(stream_ptr));
for (int i = 0; i < iteration_numbers; i++) {
refresh_uint_crd_update(atom_numbers, half_exp_gamma_plus_half, crd, quarter_cof, test_frc_f, mass_inverse,
uint_crd, reinterpret_cast<cudaStream_t>(stream_ptr));
if (need_pressure) {
constrain_force_cycle_with_virial_update(atom_numbers, constrain_pair_numbers, uint_crd, scaler, constrain_pair,
pair_dr, atom_i_serials, atom_j_serials, constant_rs, constrain_ks,
test_frc_f, d_atom_virial, reinterpret_cast<cudaStream_t>(stream_ptr));
} else {
constrain_force_cycle_update(atom_numbers, constrain_pair_numbers, uint_crd, scaler, constrain_pair, pair_dr,
atom_i_serials, atom_j_serials, constant_rs, constrain_ks, test_frc_f,
reinterpret_cast<cudaStream_t>(stream_ptr));
}
}
step++;
return true;
}
protected:
void InitSizeLists() override {
input_size_list_.push_back(ele_crd * sizeof(T));
input_size_list_.push_back(ele_quarter_cof * sizeof(T));
input_size_list_.push_back(ele_mass_inverse * sizeof(T));
input_size_list_.push_back(ele_scaler * sizeof(T));
input_size_list_.push_back(ele_pair_dr * sizeof(T));
input_size_list_.push_back(ele_atom_i_serials * sizeof(T1));
input_size_list_.push_back(ele_atom_j_serials * sizeof(T1));
input_size_list_.push_back(ele_constant_rs * sizeof(T));
input_size_list_.push_back(ele_constrain_ks * sizeof(T));
workspace_size_list_.push_back(constrain_pair_numbers * sizeof(CONSTRAIN_PAIR));
output_size_list_.push_back(3 * atom_numbers * sizeof(T2));
output_size_list_.push_back(3 * atom_numbers * sizeof(T));
output_size_list_.push_back(constrain_pair_numbers * sizeof(T));
}
private:
size_t ele_scaler = 1;
size_t ele_pair_dr = 1;
size_t ele_atom_i_serials = 1;
size_t ele_atom_j_serials = 1;
size_t ele_constant_rs = 1;
size_t ele_constrain_ks = 1;
size_t ele_crd = 1;
size_t ele_quarter_cof = 1;
size_t ele_mass_inverse = 1;
std::vector<size_t> input_size_list_;
std::vector<size_t> output_size_list_;
std::vector<size_t> workspace_size_list_;
int atom_numbers;
int constrain_pair_numbers;
int iteration_numbers;
int need_pressure;
float half_exp_gamma_plus_half;
int step;
struct CONSTRAIN_PAIR {
int atom_i_serial;
int atom_j_serial;
float constant_r;
float constrain_k;
};
};
} // namespace kernel
} // namespace mindspore
#endif

View File

@ -13,12 +13,15 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
*Note:
* LastCrdToDr. This is an experimental interface that is subject to change and/or deletion.
*/
#include "backend/kernel_compiler/gpu/sponge/simple_constrain/last_crd_to_dr_kernel.h"
namespace mindspore {
namespace kernel {
MS_REG_GPU_KERNEL_TWO(lastcrdtodr,
MS_REG_GPU_KERNEL_TWO(LastCrdToDr,
KernelAttr()
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)

View File

@ -13,12 +13,16 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
*Note:
* RefreshCrdVel. This is an experimental interface that is subject to change and/or deletion.
*/
#include "backend/kernel_compiler/gpu/sponge/simple_constrain/refresh_crd_vel_kernel.h"
namespace mindspore {
namespace kernel {
MS_REG_GPU_KERNEL_TWO(refreshcrdvel,
MS_REG_GPU_KERNEL_TWO(RefreshCrdVel,
KernelAttr()
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)

View File

@ -13,12 +13,15 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
*Note:
* RefreshUintCrd. This is an experimental interface that is subject to change and/or deletion.
*/
#include "backend/kernel_compiler/gpu/sponge/simple_constrain/refresh_uint_crd_kernel.h"
namespace mindspore {
namespace kernel {
MS_REG_GPU_KERNEL_TWO(refreshuintcrd,
MS_REG_GPU_KERNEL_TWO(RefreshUintCrd,
KernelAttr()
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)