!22706 [BETA]sponge ops: lj, pme, simple constrain
Merge pull request !22706 from jiahongQian/master0831
This commit is contained in:
commit
7d8ffd6a69
|
@ -0,0 +1,144 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
/**
|
||||
*Note:
|
||||
* LJForce. This is an experimental interface that is subject to change and/or deletion.
|
||||
*/
|
||||
|
||||
#include "backend/kernel_compiler/gpu/cuda_impl/sponge/lj/lj_direct_cf_force_with_lj_virial_direct_cf_energy_impl.cuh"
|
||||
#include "backend/kernel_compiler/gpu/cuda_impl/sponge/common_sponge.cuh"
|
||||
|
||||
__global__ void LJ_Direct_CF_Force_With_LJ_Virial_Direct_CF_Energy_CUDA(
|
||||
const int atom_numbers, const NEIGHBOR_LIST *nl, const UINT_VECTOR_LJ_TYPE *uint_crd, const VECTOR *boxlength,
|
||||
const float *LJ_type_A, const float *LJ_type_B, const float cutoff, VECTOR *frc, const float pme_beta,
|
||||
const float sqrt_pi, float *atom_lj_virial, float *atom_direct_cf_energy) {
|
||||
int atom_i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (atom_i < atom_numbers) {
|
||||
NEIGHBOR_LIST nl_i = nl[atom_i];
|
||||
int N = nl_i.atom_numbers;
|
||||
int atom_j;
|
||||
int int_x;
|
||||
int int_y;
|
||||
int int_z;
|
||||
UINT_VECTOR_LJ_TYPE r1 = uint_crd[atom_i], r2;
|
||||
VECTOR dr;
|
||||
float dr_2;
|
||||
float dr_4;
|
||||
float dr_8;
|
||||
float dr_6;
|
||||
float frc_abs = 0.;
|
||||
VECTOR frc_lin;
|
||||
VECTOR frc_record = {0., 0., 0.};
|
||||
float charge_i = r1.charge;
|
||||
float charge_j;
|
||||
float dr_abs;
|
||||
float dr_1;
|
||||
float beta_dr;
|
||||
float frc_cf_abs;
|
||||
float virial_lin = 0.;
|
||||
float energy_lin = 0.;
|
||||
|
||||
int x, y;
|
||||
int atom_pair_LJ_type;
|
||||
for (int j = threadIdx.y; j < N; j = j + blockDim.y) {
|
||||
atom_j = nl_i.atom_serial[j];
|
||||
r2 = uint_crd[atom_j];
|
||||
charge_j = r2.charge;
|
||||
|
||||
int_x = r2.uint_x - r1.uint_x;
|
||||
int_y = r2.uint_y - r1.uint_y;
|
||||
int_z = r2.uint_z - r1.uint_z;
|
||||
dr.x = boxlength[0].x * int_x;
|
||||
dr.y = boxlength[0].y * int_y;
|
||||
dr.z = boxlength[0].z * int_z;
|
||||
dr_abs = norm3df(dr.x, dr.y, dr.z);
|
||||
if (dr_abs < cutoff) {
|
||||
dr_1 = 1. / dr_abs;
|
||||
dr_2 = dr_1 * dr_1;
|
||||
dr_4 = dr_2 * dr_2;
|
||||
dr_8 = dr_4 * dr_4;
|
||||
dr_6 = dr_4 * dr_2;
|
||||
|
||||
y = (r2.LJ_type - r1.LJ_type);
|
||||
x = y >> 31;
|
||||
y = (y ^ x) - x;
|
||||
x = r2.LJ_type + r1.LJ_type;
|
||||
r2.LJ_type = (x + y) >> 1;
|
||||
x = (x - y) >> 1;
|
||||
atom_pair_LJ_type = (r2.LJ_type * (r2.LJ_type + 1) >> 1) + x;
|
||||
|
||||
frc_abs = (-LJ_type_A[atom_pair_LJ_type] * dr_6 + LJ_type_B[atom_pair_LJ_type]) * dr_8;
|
||||
beta_dr = pme_beta * dr_abs;
|
||||
frc_cf_abs = beta_dr * sqrt_pi * expf(-beta_dr * beta_dr) + erfcf(beta_dr);
|
||||
frc_cf_abs = frc_cf_abs * dr_2 * dr_1;
|
||||
frc_cf_abs = charge_i * charge_j * frc_cf_abs;
|
||||
energy_lin = energy_lin + charge_i * charge_j * erfcf(beta_dr) * dr_1;
|
||||
virial_lin = virial_lin - frc_abs * dr_abs * dr_abs;
|
||||
frc_abs = frc_abs - frc_cf_abs;
|
||||
frc_lin.x = frc_abs * dr.x;
|
||||
frc_lin.y = frc_abs * dr.y;
|
||||
frc_lin.z = frc_abs * dr.z;
|
||||
frc_record.x = frc_record.x + frc_lin.x;
|
||||
frc_record.y = frc_record.y + frc_lin.y;
|
||||
frc_record.z = frc_record.z + frc_lin.z;
|
||||
atomicAdd(&frc[atom_j].x, -frc_lin.x);
|
||||
atomicAdd(&frc[atom_j].y, -frc_lin.y);
|
||||
atomicAdd(&frc[atom_j].z, -frc_lin.z);
|
||||
}
|
||||
}
|
||||
atomicAdd(&frc[atom_i].x, frc_record.x);
|
||||
atomicAdd(&frc[atom_i].y, frc_record.y);
|
||||
atomicAdd(&frc[atom_i].z, frc_record.z);
|
||||
|
||||
atomicAdd(&atom_direct_cf_energy[atom_i], energy_lin);
|
||||
atomicAdd(&atom_lj_virial[atom_i], virial_lin);
|
||||
}
|
||||
}
|
||||
|
||||
void LJ_Direct_CF_Force_With_LJ_Virial_Direct_CF_Energy(
|
||||
const int atom_numbers, const float cutoff, const float pme_beta, const unsigned int *uint_crd_f, const int *LJtype,
|
||||
const float *charge, const float *scaler_f, float *uint_crd_with_LJ, int *nl_atom_numbers, int *nl_atom_serial,
|
||||
int *nl, const float *d_LJ_A, const float *d_LJ_B, float *frc_f, float *atom_lj_virial, float *atom_energy,
|
||||
int max_neighbor_numbers, cudaStream_t stream) {
|
||||
Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128, 0, stream>>>(3 * atom_numbers, frc_f, 0.);
|
||||
Reset_List<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>(atom_numbers, atom_energy, 0.);
|
||||
Reset_List<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>(atom_numbers, atom_lj_virial, 0.);
|
||||
VECTOR *frc = reinterpret_cast<VECTOR *>(frc_f);
|
||||
VECTOR *scaler = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(scaler_f));
|
||||
NEIGHBOR_LIST *nl_a = reinterpret_cast<NEIGHBOR_LIST *>(nl);
|
||||
construct_neighbor_list_kernel<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>(
|
||||
atom_numbers, max_neighbor_numbers, nl_atom_numbers, nl_atom_serial, nl_a);
|
||||
|
||||
UINT_VECTOR_LJ_TYPE *uint_crd_with_LJ_a = reinterpret_cast<UINT_VECTOR_LJ_TYPE *>(uint_crd_with_LJ);
|
||||
|
||||
UNSIGNED_INT_VECTOR *uint_crd =
|
||||
const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f));
|
||||
|
||||
Copy_Crd_To_New_Crd_Start<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
|
||||
atom_numbers, uint_crd, uint_crd_with_LJ_a, LJtype, charge);
|
||||
|
||||
LJ_Direct_CF_Force_With_LJ_Virial_Direct_CF_Energy_CUDA<<<ceilf(static_cast<float>(atom_numbers) / 8), thread_LJ, 0,
|
||||
stream>>>(
|
||||
atom_numbers, nl_a, uint_crd_with_LJ_a, scaler, d_LJ_A, d_LJ_B, cutoff, frc, pme_beta, TWO_DIVIDED_BY_SQRT_PI,
|
||||
atom_lj_virial, atom_energy);
|
||||
return;
|
||||
}
|
||||
|
||||
void LJ_Direct_CF_Force_With_LJ_Virial_Direct_CF_Energy(
|
||||
const int atom_numbers, const float cutoff, const float pme_beta, const unsigned int *uint_crd_f, const int *LJtype,
|
||||
const float *charge, const float *scaler_f, float *uint_crd_with_LJ, int *nl_atom_numbers, int *nl_atom_serial,
|
||||
int *nl, const float *d_LJ_A, const float *d_LJ_B, float *frc_f, float *atom_lj_virial, float *atom_energy,
|
||||
int max_neighbor_numbers, cudaStream_t stream);
|
|
@ -0,0 +1,33 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
/**
|
||||
*Note:
|
||||
* LJForce. This is an experimental interface that is subject to change and/or deletion.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_LJ_DIRECT_CF_FORCE_WITH_LJ_VIRIAL_DIRECT_CF_ENERGY_IMPL_H_
|
||||
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_LJ_DIRECT_CF_FORCE_WITH_LJ_VIRIAL_DIRECT_CF_ENERGY_IMPL_H_
|
||||
|
||||
#include <curand_kernel.h>
|
||||
#include "runtime/device/gpu/cuda_common.h"
|
||||
|
||||
void LJ_Direct_CF_Force_With_LJ_Virial_Direct_CF_Energy(
|
||||
const int atom_numbers, const float cutoff, const float pme_beta, const unsigned int *uint_crd_f, const int *LJtype,
|
||||
const float *charge, const float *scaler_f, float *uint_crd_with_LJ, int *nl_atom_numbers, int *nl_atom_serial,
|
||||
int *nl, const float *d_LJ_A, const float *d_LJ_B, float *frc_f, float *atom_lj_virial, float *atom_energy,
|
||||
int max_neighbor_numbers, cudaStream_t stream);
|
||||
|
||||
#endif
|
|
@ -13,14 +13,16 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
/**
|
||||
*Note:
|
||||
* MDIterationLeapFrog. This is an experimental interface that is subject to change and/or deletion.
|
||||
*/
|
||||
|
||||
#include "backend/kernel_compiler/gpu/cuda_impl/sponge/nvtit/md_iteration_leap_frog_impl.cuh"
|
||||
#include "backend/kernel_compiler/gpu/cuda_impl/sponge/common_sponge.cuh"
|
||||
|
||||
__global__ void MD_Iteration_Leap_Frog_With_LiuJian(const int atom_numbers, const float half_dt, const float dt,
|
||||
const float exp_gamma, const float *inverse_mass,
|
||||
const float *sqrt_mass_inverse, VECTOR *vel, VECTOR *crd,
|
||||
VECTOR *frc, VECTOR *acc, VECTOR *random_frc) {
|
||||
__global__ void MD_Iteration_Leap_Frog(const int atom_numbers, VECTOR *vel, VECTOR *crd, VECTOR *frc, VECTOR *acc,
|
||||
const float *inverse_mass, const float dt) {
|
||||
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (i < atom_numbers) {
|
||||
acc[i].x = inverse_mass[i] * frc[i].x;
|
||||
|
@ -31,17 +33,9 @@ __global__ void MD_Iteration_Leap_Frog_With_LiuJian(const int atom_numbers, cons
|
|||
vel[i].y = vel[i].y + dt * acc[i].y;
|
||||
vel[i].z = vel[i].z + dt * acc[i].z;
|
||||
|
||||
crd[i].x = crd[i].x + half_dt * vel[i].x;
|
||||
crd[i].y = crd[i].y + half_dt * vel[i].y;
|
||||
crd[i].z = crd[i].z + half_dt * vel[i].z;
|
||||
|
||||
vel[i].x = exp_gamma * vel[i].x + sqrt_mass_inverse[i] * random_frc[i].x;
|
||||
vel[i].y = exp_gamma * vel[i].y + sqrt_mass_inverse[i] * random_frc[i].y;
|
||||
vel[i].z = exp_gamma * vel[i].z + sqrt_mass_inverse[i] * random_frc[i].z;
|
||||
|
||||
crd[i].x = crd[i].x + half_dt * vel[i].x;
|
||||
crd[i].y = crd[i].y + half_dt * vel[i].y;
|
||||
crd[i].z = crd[i].z + half_dt * vel[i].z;
|
||||
crd[i].x = crd[i].x + dt * vel[i].x;
|
||||
crd[i].y = crd[i].y + dt * vel[i].y;
|
||||
crd[i].z = crd[i].z + dt * vel[i].z;
|
||||
|
||||
frc[i].x = 0.;
|
||||
frc[i].y = 0.;
|
||||
|
@ -49,88 +43,12 @@ __global__ void MD_Iteration_Leap_Frog_With_LiuJian(const int atom_numbers, cons
|
|||
}
|
||||
}
|
||||
|
||||
__global__ void MD_Iteration_Leap_Frog_With_LiuJian_With_Max_Velocity(const int atom_numbers, const float half_dt,
|
||||
const float dt, const float exp_gamma,
|
||||
const float *inverse_mass,
|
||||
const float *sqrt_mass_inverse, VECTOR *vel,
|
||||
VECTOR *crd, VECTOR *frc, VECTOR *acc,
|
||||
VECTOR *random_frc, const float max_vel) {
|
||||
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
float abs_vel;
|
||||
if (i < atom_numbers) {
|
||||
acc[i].x = inverse_mass[i] * frc[i].x;
|
||||
acc[i].y = inverse_mass[i] * frc[i].y;
|
||||
acc[i].z = inverse_mass[i] * frc[i].z;
|
||||
|
||||
vel[i].x = vel[i].x + dt * acc[i].x;
|
||||
vel[i].y = vel[i].y + dt * acc[i].y;
|
||||
vel[i].z = vel[i].z + dt * acc[i].z;
|
||||
|
||||
abs_vel = norm3df(vel[i].x, vel[i].y, vel[i].z);
|
||||
if (abs_vel < max_vel) {
|
||||
} else {
|
||||
abs_vel = max_vel / abs_vel;
|
||||
vel[i].x = abs_vel * vel[i].x;
|
||||
vel[i].y = abs_vel * vel[i].y;
|
||||
vel[i].z = abs_vel * vel[i].z;
|
||||
}
|
||||
|
||||
crd[i].x = crd[i].x + half_dt * vel[i].x;
|
||||
crd[i].y = crd[i].y + half_dt * vel[i].y;
|
||||
crd[i].z = crd[i].z + half_dt * vel[i].z;
|
||||
|
||||
vel[i].x = exp_gamma * vel[i].x + sqrt_mass_inverse[i] * random_frc[i].x;
|
||||
vel[i].y = exp_gamma * vel[i].y + sqrt_mass_inverse[i] * random_frc[i].y;
|
||||
vel[i].z = exp_gamma * vel[i].z + sqrt_mass_inverse[i] * random_frc[i].z;
|
||||
|
||||
crd[i].x = crd[i].x + half_dt * vel[i].x;
|
||||
crd[i].y = crd[i].y + half_dt * vel[i].y;
|
||||
crd[i].z = crd[i].z + half_dt * vel[i].z;
|
||||
|
||||
frc[i].x = 0.;
|
||||
frc[i].y = 0.;
|
||||
frc[i].z = 0.;
|
||||
}
|
||||
void MDIterationLeapFrog(const int atom_numbers, float *vel, float *crd, float *frc, float *acc,
|
||||
const float *inverse_mass, const float dt, cudaStream_t stream) {
|
||||
VECTOR *d_vel = reinterpret_cast<VECTOR *>(vel);
|
||||
VECTOR *d_crd = reinterpret_cast<VECTOR *>(crd);
|
||||
VECTOR *d_frc = reinterpret_cast<VECTOR *>(frc);
|
||||
VECTOR *d_acc = reinterpret_cast<VECTOR *>(acc);
|
||||
MD_Iteration_Leap_Frog<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>(
|
||||
atom_numbers, d_vel, d_crd, d_frc, d_acc, inverse_mass, dt);
|
||||
}
|
||||
|
||||
void MDIterationLeapFrog(const int float4_numbers, const int atom_numbers, const float half_dt, const float dt,
|
||||
const float exp_gamma, const int is_max_velocity, const float max_velocity,
|
||||
const float *d_mass_inverse, const float *d_sqrt_mass, float *vel_f, float *crd_f,
|
||||
float *frc_f, float *acc_f, cudaStream_t stream) {
|
||||
Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128>>>(3 * atom_numbers, acc_f, 0.);
|
||||
|
||||
VECTOR *frc = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(frc_f));
|
||||
VECTOR *vel = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(vel_f));
|
||||
VECTOR *acc = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(acc_f));
|
||||
VECTOR *crd = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(crd_f));
|
||||
|
||||
curandStatePhilox4_32_10_t *rand_state;
|
||||
VECTOR *random_force;
|
||||
|
||||
Cuda_Malloc_Safely(reinterpret_cast<void **>(&random_force), sizeof(float4) * float4_numbers);
|
||||
Cuda_Malloc_Safely(reinterpret_cast<void **>(&rand_state), sizeof(curandStatePhilox4_32_10_t) * float4_numbers);
|
||||
Setup_Rand_Normal_Kernel<<<ceilf(static_cast<float>(float4_numbers) / 32.), 32>>>(float4_numbers, rand_state, 1);
|
||||
Rand_Normal<<<ceilf(static_cast<float>(float4_numbers) / 32.), 32, 0, stream>>>(
|
||||
float4_numbers, rand_state, reinterpret_cast<float4 *>(random_force));
|
||||
|
||||
if (!is_max_velocity) {
|
||||
MD_Iteration_Leap_Frog_With_LiuJian<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
|
||||
atom_numbers, half_dt, dt, exp_gamma, d_mass_inverse, d_sqrt_mass, vel, crd, frc, acc, random_force);
|
||||
} else {
|
||||
MD_Iteration_Leap_Frog_With_LiuJian_With_Max_Velocity<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0,
|
||||
stream>>>(atom_numbers, half_dt, dt, exp_gamma,
|
||||
d_mass_inverse, d_sqrt_mass, vel, crd, frc, acc,
|
||||
random_force, max_velocity);
|
||||
|
||||
cudaStreamSynchronize(stream);
|
||||
cudaFree(random_force);
|
||||
cudaFree(rand_state);
|
||||
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
void MDIterationLeapFrog(const int float4_numbers, const int atom_numbers, const float half_dt, const float dt,
|
||||
const float exp_gamma, const int is_max_velocity, const float max_velocity,
|
||||
const float *d_mass_inverse, const float *d_sqrt_mass, float *vel_f, float *crd_f,
|
||||
float *frc_f, float *acc_f, cudaStream_t stream);
|
||||
|
|
|
@ -13,15 +13,18 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
/**
|
||||
*Note:
|
||||
* MDIterationLeapFrog. This is an experimental interface that is subject to change and/or deletion.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NVTIT_MD_ITERATION_LEAP_FROG_IMPL_H
|
||||
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NVTIT_MD_ITERATION_LEAP_FROG_IMPL_H
|
||||
|
||||
#include <curand_kernel.h>
|
||||
#include "runtime/device/gpu/cuda_common.h"
|
||||
|
||||
void MDIterationLeapFrog(const int float4_numbers, const int atom_numbers, const float half_dt, const float dt,
|
||||
const float exp_gamma, const int is_max_velocity, const float max_velocity,
|
||||
const float *d_mass_inverse, const float *d_sqrt_mass, float *vel_f, float *crd_f,
|
||||
float *frc_f, float *acc_f, cudaStream_t stream);
|
||||
void MDIterationLeapFrog(const int atom_numbers, float *vel, float *crd, float *frc, float *acc,
|
||||
const float *inverse_mass, const float dt, cudaStream_t stream);
|
||||
|
||||
#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NVTIT_MD_ITERATION_LEAP_FROG_IMPL_H
|
||||
#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_MD_ITERATION_LEAP_FROG_LIUJIAN_GPU_IMPL_H_
|
||||
|
|
|
@ -13,6 +13,10 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
/**
|
||||
*Note:
|
||||
* PME_Common. This is an experimental interface that is subject to change and/or deletion.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_PME_PME_COMMON_H_
|
||||
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_PME_PME_COMMON_H_
|
||||
|
@ -25,7 +29,7 @@ __constant__ float PME_dMa[4] = {0.5, -1.5, 1.5, -0.5};
|
|||
__constant__ float PME_dMb[4] = {0, 1, -2, 1};
|
||||
__constant__ float PME_dMc[4] = {0, 0.5, 0, -0.5};
|
||||
#define PI 3.1415926
|
||||
const float periodic_factor_inverse = 2.32830643e-10;
|
||||
const float periodic_factor_inverse = 2.3283064365387e-10;
|
||||
static dim3 thread_PME;
|
||||
|
||||
const float cutoff = 10.0;
|
||||
|
@ -98,6 +102,10 @@ static float getb(int k, int NFFT, int B_order) {
|
|||
return res.x * res.x + res.y * res.y;
|
||||
}
|
||||
|
||||
__global__ static void device_add(float *ene, float *factor, float *charge_sum) {
|
||||
ene[0] += factor[0] * charge_sum[0] * charge_sum[0];
|
||||
}
|
||||
|
||||
__global__ static void PME_Atom_Near(const UNSIGNED_INT_VECTOR *uint_crd, int *PME_atom_near, const int PME_Nin,
|
||||
const float periodic_factor_inverse_x, const float periodic_factor_inverse_y,
|
||||
const float periodic_factor_inverse_z, const int atom_numbers, const int fftx,
|
||||
|
@ -130,10 +138,13 @@ __global__ static void PME_Atom_Near(const UNSIGNED_INT_VECTOR *uint_crd, int *P
|
|||
UNSIGNED_INT_VECTOR temp_kxyz = PME_kxyz[k];
|
||||
kx = tempux - temp_kxyz.uint_x;
|
||||
if (kx < 0) kx += fftx;
|
||||
if (kx > fftx) kx -= fftx;
|
||||
ky = tempuy - temp_kxyz.uint_y;
|
||||
if (ky < 0) ky += ffty;
|
||||
if (ky > ffty) ky -= ffty;
|
||||
kz = tempuz - temp_kxyz.uint_z;
|
||||
if (kz < 0) kz += fftz;
|
||||
if (kz > fftz) kz -= fftz;
|
||||
temp_near[k] = kx * PME_Nin + ky * fftz + kz;
|
||||
}
|
||||
}
|
||||
|
@ -227,4 +238,120 @@ __global__ static void PME_Direct_Energy(const int atom_numbers, const NEIGHBOR_
|
|||
}
|
||||
}
|
||||
|
||||
__global__ static void PME_Direct_Atom_Energy(const int atom_numbers, const NEIGHBOR_LIST *nl,
|
||||
const UNSIGNED_INT_VECTOR *uint_crd, const VECTOR *boxlength,
|
||||
const float *charge, const float beta, const float cutoff_square,
|
||||
float *direct_ene) {
|
||||
int atom_i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (atom_i < atom_numbers) {
|
||||
NEIGHBOR_LIST nl_i = nl[atom_i];
|
||||
int N = nl_i.atom_numbers;
|
||||
int atom_j;
|
||||
int int_x;
|
||||
int int_y;
|
||||
int int_z;
|
||||
UNSIGNED_INT_VECTOR r1 = uint_crd[atom_i], r2;
|
||||
VECTOR dr;
|
||||
float dr2;
|
||||
float dr_abs;
|
||||
// float dr_inverse;
|
||||
float ene_temp;
|
||||
float charge_i = charge[atom_i];
|
||||
float ene_lin = 0.;
|
||||
|
||||
for (int j = threadIdx.y; j < N; j = j + blockDim.y) {
|
||||
atom_j = nl_i.atom_serial[j];
|
||||
r2 = uint_crd[atom_j];
|
||||
|
||||
int_x = r2.uint_x - r1.uint_x;
|
||||
int_y = r2.uint_y - r1.uint_y;
|
||||
int_z = r2.uint_z - r1.uint_z;
|
||||
dr.x = boxlength[0].x * int_x;
|
||||
dr.y = boxlength[0].y * int_y;
|
||||
dr.z = boxlength[0].z * int_z;
|
||||
|
||||
dr2 = dr.x * dr.x + dr.y * dr.y + dr.z * dr.z;
|
||||
if (dr2 < cutoff_square) {
|
||||
dr_abs = norm3df(dr.x, dr.y, dr.z);
|
||||
ene_temp = charge_i * charge[atom_j] * erfcf(beta * dr_abs) / dr_abs;
|
||||
ene_lin = ene_lin + ene_temp;
|
||||
}
|
||||
}
|
||||
atomicAdd(&direct_ene[atom_i], ene_lin);
|
||||
}
|
||||
}
|
||||
|
||||
__global__ static void PME_Energy_Product(const int element_number, const float *list1, const float *list2,
|
||||
float *sum) {
|
||||
if (threadIdx.x == 0) {
|
||||
sum[0] = 0.;
|
||||
}
|
||||
__syncthreads();
|
||||
float lin = 0.0;
|
||||
for (int i = threadIdx.x; i < element_number; i = i + blockDim.x) {
|
||||
lin = lin + list1[i] * list2[i];
|
||||
}
|
||||
atomicAdd(sum, lin);
|
||||
}
|
||||
|
||||
__global__ static void PME_BCFQ(cufftComplex *PME_FQ, float *PME_BC, int PME_Nfft) {
|
||||
int index = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (index < PME_Nfft) {
|
||||
float tempf = PME_BC[index];
|
||||
cufftComplex tempc = PME_FQ[index];
|
||||
PME_FQ[index].x = tempc.x * tempf;
|
||||
PME_FQ[index].y = tempc.y * tempf;
|
||||
}
|
||||
}
|
||||
|
||||
__global__ static void PME_Excluded_Energy_Correction(const int atom_numbers, const UNSIGNED_INT_VECTOR *uint_crd,
|
||||
const VECTOR *sacler, const float *charge, const float pme_beta,
|
||||
const float sqrt_pi, const int *excluded_list_start,
|
||||
const int *excluded_list, const int *excluded_atom_numbers,
|
||||
float *ene) {
|
||||
int atom_i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (atom_i < atom_numbers) {
|
||||
int excluded_number = excluded_atom_numbers[atom_i];
|
||||
if (excluded_number > 0) {
|
||||
int list_start = excluded_list_start[atom_i];
|
||||
// int atom_min = excluded_list[list_start];
|
||||
int list_end = list_start + excluded_number;
|
||||
int atom_j;
|
||||
int int_x;
|
||||
int int_y;
|
||||
int int_z;
|
||||
|
||||
float charge_i = charge[atom_i];
|
||||
float charge_j;
|
||||
float dr_abs;
|
||||
float beta_dr;
|
||||
|
||||
UNSIGNED_INT_VECTOR r1 = uint_crd[atom_i], r2;
|
||||
VECTOR dr;
|
||||
float dr2;
|
||||
|
||||
float ene_lin = 0.;
|
||||
|
||||
for (int i = list_start; i < list_end; i = i + 1) {
|
||||
atom_j = excluded_list[i];
|
||||
r2 = uint_crd[atom_j];
|
||||
charge_j = charge[atom_j];
|
||||
|
||||
int_x = r2.uint_x - r1.uint_x;
|
||||
int_y = r2.uint_y - r1.uint_y;
|
||||
int_z = r2.uint_z - r1.uint_z;
|
||||
dr.x = sacler[0].x * int_x;
|
||||
dr.y = sacler[0].y * int_y;
|
||||
dr.z = sacler[0].z * int_z;
|
||||
dr2 = dr.x * dr.x + dr.y * dr.y + dr.z * dr.z;
|
||||
|
||||
dr_abs = sqrtf(dr2);
|
||||
beta_dr = pme_beta * dr_abs;
|
||||
|
||||
ene_lin -= charge_i * charge_j * erff(beta_dr) / dr_abs;
|
||||
}
|
||||
atomicAdd(ene, ene_lin);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -13,22 +13,14 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
/**
|
||||
*Note:
|
||||
* PMEEnergy. This is an experimental interface that is subject to change and/or deletion.
|
||||
*/
|
||||
#include "backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_energy_impl.cuh"
|
||||
#include "backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_common.cuh"
|
||||
#include "backend/kernel_compiler/gpu/cuda_impl/sponge/common_sponge.cuh"
|
||||
|
||||
__global__ void PME_Energy_Product(const int element_number, const float *list1, const float *list2, float *sum) {
|
||||
if (threadIdx.x == 0) {
|
||||
sum[0] = 0.;
|
||||
}
|
||||
__syncthreads();
|
||||
float lin = 0.0;
|
||||
for (int i = threadIdx.x; i < element_number; i = i + blockDim.x) {
|
||||
lin = lin + list1[i] * list2[i];
|
||||
}
|
||||
atomicAdd(sum, lin);
|
||||
}
|
||||
|
||||
__global__ void PME_Energy_Reciprocal(const int element_number, const cufftComplex *FQ, const float *BC, float *sum) {
|
||||
if (threadIdx.x == 0) {
|
||||
sum[0] = 0.;
|
||||
|
@ -43,56 +35,6 @@ __global__ void PME_Energy_Reciprocal(const int element_number, const cufftCompl
|
|||
atomicAdd(sum, lin);
|
||||
}
|
||||
|
||||
__global__ void PME_Excluded_Energy_Correction(const int atom_numbers, const UNSIGNED_INT_VECTOR *uint_crd,
|
||||
const VECTOR *sacler, const float *charge, const float pme_beta,
|
||||
const float sqrt_pi, const int *excluded_list_start,
|
||||
const int *excluded_list, const int *excluded_atom_numbers, float *ene) {
|
||||
int atom_i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (atom_i < atom_numbers) {
|
||||
int excluded_number = excluded_atom_numbers[atom_i];
|
||||
if (excluded_number > 0) {
|
||||
int list_start = excluded_list_start[atom_i];
|
||||
// int atom_min = excluded_list[list_start];
|
||||
int list_end = list_start + excluded_number;
|
||||
int atom_j;
|
||||
int int_x;
|
||||
int int_y;
|
||||
int int_z;
|
||||
|
||||
float charge_i = charge[atom_i];
|
||||
float charge_j;
|
||||
float dr_abs;
|
||||
float beta_dr;
|
||||
|
||||
UNSIGNED_INT_VECTOR r1 = uint_crd[atom_i], r2;
|
||||
VECTOR dr;
|
||||
float dr2;
|
||||
|
||||
float ene_lin = 0.;
|
||||
|
||||
for (int i = list_start; i < list_end; i = i + 1) {
|
||||
atom_j = excluded_list[i];
|
||||
r2 = uint_crd[atom_j];
|
||||
charge_j = charge[atom_j];
|
||||
|
||||
int_x = r2.uint_x - r1.uint_x;
|
||||
int_y = r2.uint_y - r1.uint_y;
|
||||
int_z = r2.uint_z - r1.uint_z;
|
||||
dr.x = sacler[0].x * int_x;
|
||||
dr.y = sacler[0].y * int_y;
|
||||
dr.z = sacler[0].z * int_z;
|
||||
dr2 = dr.x * dr.x + dr.y * dr.y + dr.z * dr.z;
|
||||
|
||||
dr_abs = sqrtf(dr2);
|
||||
beta_dr = pme_beta * dr_abs;
|
||||
|
||||
ene_lin -= charge_i * charge_j * erff(beta_dr) / dr_abs;
|
||||
}
|
||||
atomicAdd(ene, ene_lin);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void PMEEnergy(int fftx, int ffty, int fftz, int atom_numbers, float beta, float *PME_BC, int *pme_uxyz,
|
||||
float *pme_frxyz, float *PME_Q, float *pme_fq, int *PME_atom_near, int *pme_kxyz, const int *uint_crd_f,
|
||||
const float *charge, int *nl_atom_numbers, int *nl_atom_serial, int *nl, const float *scaler_f,
|
||||
|
|
|
@ -0,0 +1,90 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
/**
|
||||
*Note:
|
||||
* PMEEnergyUpdate. This is an experimental interface that is subject to change and/or deletion.
|
||||
*/
|
||||
#include "backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_energy_update_impl.cuh"
|
||||
#include "backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_common.cuh"
|
||||
#include "backend/kernel_compiler/gpu/cuda_impl/sponge/common_sponge.cuh"
|
||||
|
||||
__global__ void PME_Energy_Reciprocal_update(const int element_number, const cufftComplex *FQ, const float *BC,
|
||||
float *sum) {
|
||||
if (threadIdx.x == 0) {
|
||||
sum[0] = 0.;
|
||||
}
|
||||
__syncthreads();
|
||||
float lin = 0.0;
|
||||
cufftComplex FQ_i;
|
||||
for (int i = threadIdx.x; i < element_number; i = i + blockDim.x) {
|
||||
FQ_i = FQ[i];
|
||||
lin = lin + (FQ_i.x * FQ_i.x + FQ_i.y * FQ_i.y) * BC[i];
|
||||
}
|
||||
atomicAdd(sum, lin);
|
||||
}
|
||||
|
||||
void PMEEnergyUpdate(int fftx, int ffty, int fftz, int atom_numbers, float beta, float *PME_BC, int *pme_uxyz,
|
||||
float *pme_frxyz, float *PME_Q, float *pme_fq, int *PME_atom_near, int *pme_kxyz,
|
||||
const int *uint_crd_f, const float *charge, int *nl_atom_numbers, int *nl_atom_serial, int *nl,
|
||||
const float *scaler_f, const int *excluded_list_start, const int *excluded_list,
|
||||
const int *excluded_atom_numbers, float *d_reciprocal_ene, float *d_self_ene, float *d_direct_ene,
|
||||
float *d_correction_ene, dim3 thread_PME, int PME_Nin, int PME_Nfft, int PME_Nall,
|
||||
const cufftHandle &PME_plan_r2c, const cufftHandle &PME_plan_c2r, float *neutralizing_factor,
|
||||
float *charge_sum, int max_neighbor_numbers, cudaStream_t stream) {
|
||||
UNSIGNED_INT_VECTOR *uint_crd =
|
||||
const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f));
|
||||
VECTOR *scaler = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(scaler_f));
|
||||
// int max_neighbor_numbers = 800;
|
||||
NEIGHBOR_LIST *nl_a = reinterpret_cast<NEIGHBOR_LIST *>(nl);
|
||||
construct_neighbor_list_kernel<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>(
|
||||
atom_numbers, max_neighbor_numbers, nl_atom_numbers, nl_atom_serial, nl_a);
|
||||
|
||||
UNSIGNED_INT_VECTOR *PME_uxyz = reinterpret_cast<UNSIGNED_INT_VECTOR *>(pme_uxyz);
|
||||
UNSIGNED_INT_VECTOR *PME_kxyz = reinterpret_cast<UNSIGNED_INT_VECTOR *>(pme_kxyz);
|
||||
VECTOR *PME_frxyz = reinterpret_cast<VECTOR *>(pme_frxyz);
|
||||
cufftComplex *PME_FQ = reinterpret_cast<cufftComplex *>(pme_fq);
|
||||
|
||||
Reset_List<<<3 * atom_numbers / 32 + 1, 32, 0, stream>>>(3 * atom_numbers, reinterpret_cast<int *>(PME_uxyz),
|
||||
1 << 30);
|
||||
PME_Atom_Near<<<atom_numbers / 32 + 1, 32, 0, stream>>>(
|
||||
uint_crd, PME_atom_near, PME_Nin, periodic_factor_inverse * fftx, periodic_factor_inverse * ffty,
|
||||
periodic_factor_inverse * fftz, atom_numbers, fftx, ffty, fftz, PME_kxyz, PME_uxyz, PME_frxyz);
|
||||
|
||||
Reset_List<<<PME_Nall / 1024 + 1, 1024, 0, stream>>>(PME_Nall, PME_Q, 0);
|
||||
|
||||
PME_Q_Spread<<<atom_numbers / thread_PME.x + 1, thread_PME, 0, stream>>>(PME_atom_near, charge, PME_frxyz, PME_Q,
|
||||
PME_kxyz, atom_numbers);
|
||||
|
||||
cufftExecR2C(PME_plan_r2c, reinterpret_cast<float *>(PME_Q), reinterpret_cast<cufftComplex *>(PME_FQ));
|
||||
|
||||
PME_Energy_Reciprocal_update<<<1, 1024, 0, stream>>>(PME_Nfft, PME_FQ, PME_BC, d_reciprocal_ene);
|
||||
|
||||
PME_Energy_Product<<<1, 1024, 0, stream>>>(atom_numbers, charge, charge, d_self_ene);
|
||||
Scale_List<<<1, 1, 0, stream>>>(1, d_self_ene, -beta / sqrtf(PI));
|
||||
|
||||
Sum_Of_List<<<1, 1024>>>(atom_numbers, charge, charge_sum);
|
||||
device_add<<<1, 1>>>(d_self_ene, neutralizing_factor, charge_sum);
|
||||
|
||||
Reset_List<<<1, 1, 0, stream>>>(1, d_direct_ene, 0.0);
|
||||
PME_Direct_Energy<<<atom_numbers / thread_PME.x + 1, thread_PME, 0, stream>>>(
|
||||
atom_numbers, nl_a, uint_crd, scaler, charge, beta, cutoff * cutoff, d_direct_ene);
|
||||
|
||||
Reset_List<<<1, 1, 0, stream>>>(1, d_correction_ene, 0.0);
|
||||
PME_Excluded_Energy_Correction<<<atom_numbers / 32 + 1, 32, 0, stream>>>(
|
||||
atom_numbers, uint_crd, scaler, charge, beta, sqrtf(PI), excluded_list_start, excluded_list, excluded_atom_numbers,
|
||||
d_correction_ene);
|
||||
return;
|
||||
}
|
|
@ -0,0 +1,31 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_PME_PME_ENERGY_UPDATE_IMPL_H_
|
||||
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_PME_PME_ENERGY_UPDATE_IMPL_H_
|
||||
|
||||
#include <cufft.h>
|
||||
#include "runtime/device/gpu/cuda_common.h"
|
||||
|
||||
void PMEEnergyUpdate(int fftx, int ffty, int fftz, int atom_numbers, float beta, float *PME_BC, int *pme_uxyz,
|
||||
float *pme_frxyz, float *PME_Q, float *pme_fq, int *PME_atom_near, int *pme_kxyz,
|
||||
const int *uint_crd_f, const float *charge, int *nl_atom_numbers, int *nl_atom_serial, int *nl,
|
||||
const float *scaler_f, const int *excluded_list_start, const int *excluded_list,
|
||||
const int *excluded_atom_numbers, float *d_reciprocal_ene, float *d_self_ene, float *d_direct_ene,
|
||||
float *d_correction_ene, dim3 thread_PME, int PME_Nin, int PME_Nfft, int PME_Nall,
|
||||
const cufftHandle &PME_plan_r2c, const cufftHandle &PME_plan_c2r, float *neutralizing_factor,
|
||||
float *charge_sum, int max_neighbor_numbers, cudaStream_t stream);
|
||||
|
||||
#endif
|
|
@ -13,19 +13,13 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
/**
|
||||
*Note:
|
||||
* PMEReciprocalForce. This is an experimental interface that is subject to change and/or deletion.
|
||||
*/
|
||||
#include "backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_reciprocal_force_impl.cuh"
|
||||
#include "backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_common.cuh"
|
||||
|
||||
__global__ void PME_BCFQ(cufftComplex *PME_FQ, float *PME_BC, int PME_Nfft) {
|
||||
int index = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (index < PME_Nfft) {
|
||||
float tempf = PME_BC[index];
|
||||
cufftComplex tempc = PME_FQ[index];
|
||||
PME_FQ[index].x = tempc.x * tempf;
|
||||
PME_FQ[index].y = tempc.y * tempf;
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void PME_Final(int *PME_atom_near, const float *charge, const float *PME_Q, VECTOR *force,
|
||||
const VECTOR *PME_frxyz, const UNSIGNED_INT_VECTOR *PME_kxyz,
|
||||
const _VECTOR PME_inverse_box_vector, const int atom_numbers) {
|
||||
|
@ -92,9 +86,11 @@ void PMEReciprocalForce(int fftx, int ffty, int fftz, int atom_numbers, float be
|
|||
// initial end
|
||||
Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128, 0, stream>>>(
|
||||
3 * atom_numbers, reinterpret_cast<float *>(frc), 0.);
|
||||
|
||||
PME_Atom_Near<<<atom_numbers / 32 + 1, 32, 0, stream>>>(
|
||||
uint_crd, PME_atom_near, PME_Nin, periodic_factor_inverse * fftx, periodic_factor_inverse * ffty,
|
||||
periodic_factor_inverse * fftz, atom_numbers, fftx, ffty, fftz, PME_kxyz, PME_uxyz, PME_frxyz);
|
||||
|
||||
Reset_List<<<PME_Nall / 1024 + 1, 1024, 0, stream>>>(PME_Nall, PME_Q, 0);
|
||||
|
||||
PME_Q_Spread<<<atom_numbers / thread_PME.x + 1, thread_PME, 0, stream>>>(PME_atom_near, charge, PME_frxyz, PME_Q,
|
||||
|
|
|
@ -13,9 +13,13 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
/**
|
||||
*Note:
|
||||
* Constrain_Force_Cycle. This is an experimental interface that is subject to change and/or deletion.
|
||||
*/
|
||||
|
||||
#include "backend/kernel_compiler/gpu/cuda_impl/sponge/simple_constrain/constrain_force_cycle_impl.cuh"
|
||||
#include "backend/kernel_compiler/gpu/cuda_impl/sponge/common_sponge.cuh"
|
||||
#include "backend/kernel_compiler/gpu/cuda_impl/sponge/simple_constrain/constrain_force_cycle_impl.cuh"
|
||||
|
||||
__global__ void Constrain_Force_Cycle_Kernel(const int constrain_pair_numbers, const UNSIGNED_INT_VECTOR *uint_crd,
|
||||
const VECTOR *scaler, const CONSTRAIN_PAIR *constrain_pair,
|
||||
|
@ -64,10 +68,10 @@ void Constrain_Force_Cycle(int atom_numbers, int constrain_pair_numbers, const u
|
|||
CONSTRAIN_PAIR *constrain_pair = reinterpret_cast<CONSTRAIN_PAIR *>(constrain_pair_f);
|
||||
|
||||
construct_constrain_pair<<<ceilf(static_cast<float>(constrain_pair_numbers) / 128), 128, 0, stream>>>(
|
||||
constrain_pair_numbers, atom_i_serials, atom_j_serials, constant_rs, constrain_ks, constrain_pair);
|
||||
constrain_pair_numbers, atom_i_serials, atom_j_serials, constant_rs, constrain_ks, constrain_pair);
|
||||
|
||||
Constrain_Force_Cycle_Kernel<<<block_per_grid, thread_per_block, 0, stream>>>(
|
||||
constrain_pair_numbers, uint_crd, scaler, constrain_pair, pair_dr, test_frc);
|
||||
constrain_pair_numbers, uint_crd, scaler, constrain_pair, pair_dr, test_frc);
|
||||
|
||||
return;
|
||||
}
|
||||
|
|
|
@ -13,6 +13,10 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
/**
|
||||
*Note:
|
||||
* Constrain_Force_Cycle. This is an experimental interface that is subject to change and/or deletion.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_SIMPLE_CONSTRAIN_CONSTRAIN_FORCE_CYCLE_IMPL_H_
|
||||
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_SIMPLE_CONSTRAIN_CONSTRAIN_FORCE_CYCLE_IMPL_H_
|
||||
|
@ -24,4 +28,5 @@ void Constrain_Force_Cycle(int atom_numbers, int constrain_pair_numbers, const u
|
|||
const float *scaler_f, float *constrain_pair_f, const float *pair_dr_f,
|
||||
const int *atom_i_serials, const int *atom_j_serials, const float *constant_rs,
|
||||
const float *constrain_ks, float *test_frc_f, cudaStream_t stream);
|
||||
#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_SIMPLE_CONSTRAIN_CONSTRAIN_FORCE_CYCLE_WITH_VIRIAL_IMPL_H_
|
||||
|
||||
#endif
|
||||
|
|
|
@ -13,9 +13,13 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
/**
|
||||
*Note:
|
||||
* ConstrainForceCycleVirial. This is an experimental interface that is subject to change and/or deletion.
|
||||
*/
|
||||
|
||||
#include "backend/kernel_compiler/gpu/cuda_impl/sponge/simple_constrain/constrain_force_cycle_with_virial_impl.cuh"
|
||||
#include "backend/kernel_compiler/gpu/cuda_impl/sponge/common_sponge.cuh"
|
||||
#include "backend/kernel_compiler/gpu/cuda_impl/sponge/simple_constrain/constrain_force_cycle_with_virial_impl.cuh"
|
||||
|
||||
__global__ void Constrain_Force_Cycle_With_Virial(int constrain_pair_numbers, const UNSIGNED_INT_VECTOR *uint_crd,
|
||||
const VECTOR *scaler, CONSTRAIN_PAIR *constrain_pair,
|
||||
|
@ -46,7 +50,8 @@ void Constrain_Force_Cycle_With_Virial(int atom_numbers, int constrain_pair_numb
|
|||
const float *constrain_ks, float *test_frc_f, float *d_atom_virial,
|
||||
cudaStream_t stream) {
|
||||
Reset_List<<<ceilf(static_cast<float>(3 * atom_numbers) / 128), 128, 0, stream>>>(3 * atom_numbers, test_frc_f, 0.);
|
||||
Reset_List<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>(atom_numbers, d_atom_virial, 0.);
|
||||
Reset_List<<<ceilf(static_cast<float>(constrain_pair_numbers) / 128), 128, 0, stream>>>(constrain_pair_numbers,
|
||||
d_atom_virial, 0.);
|
||||
size_t thread_per_block = 128;
|
||||
size_t block_per_grid = ceilf(static_cast<float>(atom_numbers) / 128);
|
||||
const UNSIGNED_INT_VECTOR *uint_crd = reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f);
|
||||
|
@ -58,10 +63,10 @@ void Constrain_Force_Cycle_With_Virial(int atom_numbers, int constrain_pair_numb
|
|||
CONSTRAIN_PAIR *constrain_pair = reinterpret_cast<CONSTRAIN_PAIR *>(constrain_pair_f);
|
||||
|
||||
construct_constrain_pair<<<ceilf(static_cast<float>(constrain_pair_numbers) / 128), 128, 0, stream>>>(
|
||||
constrain_pair_numbers, atom_i_serials, atom_j_serials, constant_rs, constrain_ks, constrain_pair);
|
||||
constrain_pair_numbers, atom_i_serials, atom_j_serials, constant_rs, constrain_ks, constrain_pair);
|
||||
|
||||
Constrain_Force_Cycle_With_Virial<<<block_per_grid, thread_per_block, 0, stream>>>(
|
||||
constrain_pair_numbers, uint_crd, scaler, constrain_pair, pair_dr, test_frc, d_atom_virial);
|
||||
constrain_pair_numbers, uint_crd, scaler, constrain_pair, pair_dr, test_frc, d_atom_virial);
|
||||
|
||||
return;
|
||||
}
|
||||
|
|
|
@ -13,6 +13,10 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
/**
|
||||
*Note:
|
||||
* ConstrainForceCycleVirial. This is an experimental interface that is subject to change and/or deletion.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_SIMPLE_CONSTRAIN_CONSTRAIN_FORCE_CYCLE_WITH_VIRIAL_IMPL_H_
|
||||
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_SIMPLE_CONSTRAIN_CONSTRAIN_FORCE_CYCLE_WITH_VIRIAL_IMPL_H_
|
||||
|
@ -25,4 +29,4 @@ void Constrain_Force_Cycle_With_Virial(int atom_numbers, int constrain_pair_numb
|
|||
const int *atom_i_serials, const int *atom_j_serials, const float *constant_rs,
|
||||
const float *constrain_ks, float *test_frc_f, float *d_atom_virial,
|
||||
cudaStream_t stream);
|
||||
#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_SIMPLE_CONSTRAIN_CONSTRAIN_FORCE_CYCLE_WITH_VIRIAL_IMPL_H_
|
||||
#endif
|
||||
|
|
|
@ -0,0 +1,180 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
/**
|
||||
*Note:
|
||||
* ConstrainForceCycleVirial. This is an experimental interface that is subject to change and/or deletion.
|
||||
*/
|
||||
|
||||
#include "backend/kernel_compiler/gpu/cuda_impl/sponge/common_sponge.cuh"
|
||||
#include "backend/kernel_compiler/gpu/cuda_impl/sponge/simple_constrain/constrain_force_virial_impl.cuh"
|
||||
|
||||
__global__ void constrain_force_cycle_with_virial_update_kernel(int constrain_pair_numbers,
|
||||
const UNSIGNED_INT_VECTOR *uint_crd,
|
||||
const VECTOR *scaler, CONSTRAIN_PAIR *constrain_pair,
|
||||
const VECTOR *pair_dr, VECTOR *test_frc,
|
||||
float *d_atom_virial) {
|
||||
int pair_i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (pair_i < constrain_pair_numbers) {
|
||||
CONSTRAIN_PAIR cp = constrain_pair[pair_i];
|
||||
VECTOR dr0 = pair_dr[pair_i];
|
||||
VECTOR dr = Get_Periodic_Displacement(uint_crd[cp.atom_i_serial], uint_crd[cp.atom_j_serial], scaler[0]);
|
||||
float r_1 = rnorm3df(dr.x, dr.y, dr.z);
|
||||
float frc_abs = (1. - cp.constant_r * r_1) * cp.constrain_k;
|
||||
VECTOR frc_lin = frc_abs * dr0;
|
||||
d_atom_virial[pair_i] -= frc_lin * dr0;
|
||||
|
||||
atomicAdd(&test_frc[cp.atom_j_serial].x, frc_lin.x);
|
||||
atomicAdd(&test_frc[cp.atom_j_serial].y, frc_lin.y);
|
||||
atomicAdd(&test_frc[cp.atom_j_serial].z, frc_lin.z);
|
||||
|
||||
atomicAdd(&test_frc[cp.atom_i_serial].x, -frc_lin.x);
|
||||
atomicAdd(&test_frc[cp.atom_i_serial].y, -frc_lin.y);
|
||||
atomicAdd(&test_frc[cp.atom_i_serial].z, -frc_lin.z);
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void constrain_force_cycle_update_kernel(const int constrain_pair_numbers,
|
||||
const UNSIGNED_INT_VECTOR *uint_crd, const VECTOR *scaler,
|
||||
const CONSTRAIN_PAIR *constrain_pair, const VECTOR *pair_dr,
|
||||
VECTOR *test_frc) {
|
||||
int pair_i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (pair_i < constrain_pair_numbers) {
|
||||
CONSTRAIN_PAIR cp = constrain_pair[pair_i];
|
||||
float r_1;
|
||||
VECTOR dr;
|
||||
float frc_abs;
|
||||
VECTOR frc_lin;
|
||||
|
||||
dr.x = (static_cast<int>(uint_crd[cp.atom_i_serial].uint_x - uint_crd[cp.atom_j_serial].uint_x)) * scaler[0].x;
|
||||
dr.y = (static_cast<int>(uint_crd[cp.atom_i_serial].uint_y - uint_crd[cp.atom_j_serial].uint_y)) * scaler[0].y;
|
||||
dr.z = (static_cast<int>(uint_crd[cp.atom_i_serial].uint_z - uint_crd[cp.atom_j_serial].uint_z)) * scaler[0].z;
|
||||
r_1 = rnorm3df(dr.x, dr.y, dr.z);
|
||||
frc_abs = (1. - cp.constant_r * r_1) * cp.constrain_k;
|
||||
|
||||
frc_lin.x = frc_abs * pair_dr[pair_i].x;
|
||||
frc_lin.y = frc_abs * pair_dr[pair_i].y;
|
||||
frc_lin.z = frc_abs * pair_dr[pair_i].z;
|
||||
|
||||
atomicAdd(&test_frc[cp.atom_j_serial].x, frc_lin.x);
|
||||
atomicAdd(&test_frc[cp.atom_j_serial].y, frc_lin.y);
|
||||
atomicAdd(&test_frc[cp.atom_j_serial].z, frc_lin.z);
|
||||
|
||||
atomicAdd(&test_frc[cp.atom_i_serial].x, -frc_lin.x);
|
||||
atomicAdd(&test_frc[cp.atom_i_serial].y, -frc_lin.y);
|
||||
atomicAdd(&test_frc[cp.atom_i_serial].z, -frc_lin.z);
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void refresh_uint_crd_update_kernel(int atom_numbers, const VECTOR *crd,
|
||||
const VECTOR *quarter_crd_to_uint_crd_cof, UNSIGNED_INT_VECTOR *uint_crd,
|
||||
VECTOR *test_frc, const float *mass_inverse,
|
||||
const float half_exp_gamma_plus_half) {
|
||||
int atom_i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (atom_i < atom_numbers) {
|
||||
INT_VECTOR tempi;
|
||||
VECTOR crd_lin = crd[atom_i];
|
||||
VECTOR frc_lin = test_frc[atom_i];
|
||||
float mass_lin = mass_inverse[atom_i];
|
||||
|
||||
crd_lin.x = crd_lin.x + half_exp_gamma_plus_half * frc_lin.x * mass_lin;
|
||||
crd_lin.y = crd_lin.y + half_exp_gamma_plus_half * frc_lin.y * mass_lin;
|
||||
crd_lin.z = crd_lin.z + half_exp_gamma_plus_half * frc_lin.z * mass_lin;
|
||||
|
||||
tempi.int_x = crd_lin.x * quarter_crd_to_uint_crd_cof[0].x;
|
||||
tempi.int_y = crd_lin.y * quarter_crd_to_uint_crd_cof[0].y;
|
||||
tempi.int_z = crd_lin.z * quarter_crd_to_uint_crd_cof[0].z;
|
||||
|
||||
uint_crd[atom_i].uint_x = tempi.int_x << 2;
|
||||
uint_crd[atom_i].uint_y = tempi.int_y << 2;
|
||||
uint_crd[atom_i].uint_z = tempi.int_z << 2;
|
||||
}
|
||||
}
|
||||
|
||||
void constrain_force_cycle_update(int atom_numbers, int constrain_pair_numbers, const unsigned int *uint_crd_f,
|
||||
const float *scaler_f, float *constrain_pair_f, const float *pair_dr_f,
|
||||
const int *atom_i_serials, const int *atom_j_serials, const float *constant_rs,
|
||||
const float *constrain_ks, float *test_frc_f, cudaStream_t stream) {
|
||||
size_t thread_per_block = 128;
|
||||
size_t block_per_grid = ceilf(static_cast<float>(atom_numbers) / 128);
|
||||
const UNSIGNED_INT_VECTOR *uint_crd = reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f);
|
||||
const VECTOR *scaler = reinterpret_cast<const VECTOR *>(scaler_f);
|
||||
const VECTOR *pair_dr = reinterpret_cast<const VECTOR *>(pair_dr_f);
|
||||
|
||||
VECTOR *test_frc = reinterpret_cast<VECTOR *>(test_frc_f);
|
||||
|
||||
CONSTRAIN_PAIR *constrain_pair = reinterpret_cast<CONSTRAIN_PAIR *>(constrain_pair_f);
|
||||
|
||||
construct_constrain_pair<<<ceilf(static_cast<float>(constrain_pair_numbers) / 128), 128, 0, stream>>>(
|
||||
constrain_pair_numbers, atom_i_serials, atom_j_serials, constant_rs, constrain_ks, constrain_pair);
|
||||
|
||||
constrain_force_cycle_update_kernel<<<block_per_grid, thread_per_block, 0, stream>>>(
|
||||
constrain_pair_numbers, uint_crd, scaler, constrain_pair, pair_dr, test_frc);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
void constrain_force_cycle_with_virial_update(int atom_numbers, int constrain_pair_numbers,
|
||||
const unsigned int *uint_crd_f, const float *scaler_f,
|
||||
float *constrain_pair_f, const float *pair_dr_f,
|
||||
const int *atom_i_serials, const int *atom_j_serials,
|
||||
const float *constant_rs, const float *constrain_ks, float *test_frc_f,
|
||||
float *d_atom_virial, cudaStream_t stream) {
|
||||
size_t thread_per_block = 128;
|
||||
size_t block_per_grid = ceilf(static_cast<float>(atom_numbers) / 128);
|
||||
const UNSIGNED_INT_VECTOR *uint_crd = reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f);
|
||||
const VECTOR *scaler = reinterpret_cast<const VECTOR *>(scaler_f);
|
||||
const VECTOR *pair_dr = reinterpret_cast<const VECTOR *>(pair_dr_f);
|
||||
|
||||
VECTOR *test_frc = reinterpret_cast<VECTOR *>(test_frc_f);
|
||||
|
||||
CONSTRAIN_PAIR *constrain_pair = reinterpret_cast<CONSTRAIN_PAIR *>(constrain_pair_f);
|
||||
|
||||
construct_constrain_pair<<<ceilf(static_cast<float>(constrain_pair_numbers) / 128), 128, 0, stream>>>(
|
||||
constrain_pair_numbers, atom_i_serials, atom_j_serials, constant_rs, constrain_ks, constrain_pair);
|
||||
|
||||
constrain_force_cycle_with_virial_update_kernel<<<block_per_grid, thread_per_block, 0, stream>>>(
|
||||
constrain_pair_numbers, uint_crd, scaler, constrain_pair, pair_dr, test_frc, d_atom_virial);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
void refresh_uint_crd_update(int atom_numbers, float half_exp_gamma_plus_half, const float *crd_f,
|
||||
const float *quarter_crd_to_uint_crd_cof_f, float *test_frc_f, const float *mass_inverse,
|
||||
unsigned int *uint_crd_f, cudaStream_t stream) {
|
||||
size_t thread_per_block = 128;
|
||||
size_t block_per_grid = ceilf(static_cast<float>(atom_numbers) / 128);
|
||||
const VECTOR *crd = reinterpret_cast<const VECTOR *>(crd_f);
|
||||
const VECTOR *quarter_crd_to_uint_crd_cof = reinterpret_cast<const VECTOR *>(quarter_crd_to_uint_crd_cof_f);
|
||||
VECTOR *test_frc = reinterpret_cast<VECTOR *>(test_frc_f);
|
||||
UNSIGNED_INT_VECTOR *uint_crd = reinterpret_cast<UNSIGNED_INT_VECTOR *>(uint_crd_f);
|
||||
|
||||
refresh_uint_crd_update_kernel<<<block_per_grid, thread_per_block, 0, stream>>>(
|
||||
atom_numbers, crd, quarter_crd_to_uint_crd_cof, uint_crd, test_frc, mass_inverse, half_exp_gamma_plus_half);
|
||||
return;
|
||||
}
|
||||
|
||||
void set_zero_force_with_virial(int atom_numbers, int constrain_pair_numbers, float *test_frc_f, float *d_atom_virial,
|
||||
cudaStream_t stream) {
|
||||
Reset_List<<<ceilf(static_cast<float>(3 * atom_numbers) / 128), 128, 0, stream>>>(3 * atom_numbers, test_frc_f, 0.);
|
||||
Reset_List<<<ceilf(static_cast<float>(constrain_pair_numbers) / 128), 128, 0, stream>>>(constrain_pair_numbers,
|
||||
d_atom_virial, 0.);
|
||||
return;
|
||||
}
|
||||
|
||||
void set_zero(int numbers, float *x, cudaStream_t stream) {
|
||||
Reset_List<<<ceilf(static_cast<float>(numbers) / 128), 128, 0, stream>>>(numbers, x, 0.);
|
||||
return;
|
||||
}
|
|
@ -0,0 +1,48 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
/**
|
||||
*Note:
|
||||
* ConstrainForce. This is an experimental interface that is subject to change and/or deletion.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_SIMPLE_CONSTRAIN_CONSTRAIN_FORCE_VIRIAL_IMPL_H_
|
||||
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_SIMPLE_CONSTRAIN_CONSTRAIN_FORCE_VIRIAL_IMPL_H_
|
||||
|
||||
#include <curand_kernel.h>
|
||||
#include "runtime/device/gpu/cuda_common.h"
|
||||
|
||||
void constrain_force_cycle_update(int atom_numbers, int constrain_pair_numbers, const unsigned int *uint_crd_f,
|
||||
const float *scaler_f, float *constrain_pair_f, const float *pair_dr_f,
|
||||
const int *atom_i_serials, const int *atom_j_serials, const float *constant_rs,
|
||||
const float *constrain_ks, float *test_frc_f, cudaStream_t stream);
|
||||
|
||||
void constrain_force_cycle_with_virial_update(int atom_numbers, int constrain_pair_numbers,
|
||||
const unsigned int *uint_crd_f, const float *scaler_f,
|
||||
float *constrain_pair_f, const float *pair_dr_f,
|
||||
const int *atom_i_serials, const int *atom_j_serials,
|
||||
const float *constant_rs, const float *constrain_ks, float *test_frc_f,
|
||||
float *d_atom_virial, cudaStream_t stream);
|
||||
|
||||
void refresh_uint_crd_update(int atom_numbers, float half_exp_gamma_plus_half, const float *crd_f,
|
||||
const float *quarter_crd_to_uint_crd_cof_f, float *test_frc_f, const float *mass_inverse,
|
||||
unsigned int *uint_crd_f, cudaStream_t stream);
|
||||
|
||||
void set_zero_force_with_virial(int atom_numbers, int constrain_pair_numbers, float *test_frc_f, float *d_atom_virial,
|
||||
cudaStream_t stream);
|
||||
|
||||
void set_zero(int numbers, float *x, cudaStream_t stream);
|
||||
|
||||
#endif
|
|
@ -0,0 +1,39 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
/**
|
||||
*Note:
|
||||
* LJForceWithVirialEnergy. This is an experimental interface that is subject to change and/or deletion.
|
||||
*/
|
||||
#include "backend/kernel_compiler/gpu/sponge/lj/lj_direct_cf_force_with_lj_virial_direct_cf_energy_kernel.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
MS_REG_GPU_KERNEL_THREE(LJForceWithVirialEnergy,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeUInt32)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddOutputAttr(kNumberTypeFloat32)
|
||||
.AddOutputAttr(kNumberTypeFloat32)
|
||||
.AddOutputAttr(kNumberTypeFloat32),
|
||||
LJForceWithVirialEnergyGpuKernel, float, int, unsigned int)
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
|
@ -0,0 +1,142 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
/**
|
||||
*Note:
|
||||
* LJForceWithVirialEnergy. This is an experimental interface that is subject to change and/or deletion.
|
||||
*/
|
||||
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_LJ_DIRECT_CF_FORCE_WITH_LJ_VIRIAL_DIRECT_CF_ENERGY_KERNEL_H_
|
||||
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_LJ_DIRECT_CF_FORCE_WITH_LJ_VIRIAL_DIRECT_CF_ENERGY_KERNEL_H_
|
||||
#include <cuda_runtime_api.h>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
|
||||
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
|
||||
#include "runtime/device/gpu/cuda_common.h"
|
||||
#include "backend/kernel_compiler/gpu/cuda_impl/sponge/lj/lj_direct_cf_force_with_lj_virial_direct_cf_energy_impl.cuh"
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
template <typename T, typename T1, typename T2>
|
||||
class LJForceWithVirialEnergyGpuKernel : public GpuKernel {
|
||||
public:
|
||||
LJForceWithVirialEnergyGpuKernel() : ele_uint_crd(1) {}
|
||||
~LJForceWithVirialEnergyGpuKernel() override = default;
|
||||
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
kernel_node_ = kernel_node;
|
||||
atom_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "atom_numbers"));
|
||||
max_neighbor_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "max_neighbor_numbers"));
|
||||
cutoff = static_cast<float>(GetAttr<float_t>(kernel_node, "cutoff"));
|
||||
pme_beta = static_cast<float>(GetAttr<float_t>(kernel_node, "pme_beta"));
|
||||
|
||||
auto shape_uint_crd = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
auto shape_LJtype = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
|
||||
auto shape_charge = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2);
|
||||
auto shape_scaler = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3);
|
||||
auto shape_nl_numbers = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 4);
|
||||
auto shape_nl_serial = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 5);
|
||||
auto shape_d_LJ_a = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 6);
|
||||
auto shape_d_LJ_b = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 7);
|
||||
|
||||
for (size_t i = 0; i < shape_uint_crd.size(); i++) ele_uint_crd *= shape_uint_crd[i];
|
||||
for (size_t i = 0; i < shape_LJtype.size(); i++) ele_LJtype *= shape_LJtype[i];
|
||||
for (size_t i = 0; i < shape_charge.size(); i++) ele_charge *= shape_charge[i];
|
||||
for (size_t i = 0; i < shape_scaler.size(); i++) ele_scaler *= shape_scaler[i];
|
||||
for (size_t i = 0; i < shape_d_LJ_a.size(); i++) ele_d_LJ_a *= shape_d_LJ_a[i];
|
||||
for (size_t i = 0; i < shape_d_LJ_b.size(); i++) ele_d_LJ_b *= shape_d_LJ_b[i];
|
||||
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
|
||||
const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
|
||||
const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
|
||||
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
|
||||
|
||||
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
|
||||
auto uint_crd = GetDeviceAddress<T2>(inputs, 0);
|
||||
auto LJtype = GetDeviceAddress<T1>(inputs, 1);
|
||||
auto charge = GetDeviceAddress<T>(inputs, 2);
|
||||
auto scaler = GetDeviceAddress<T>(inputs, 3);
|
||||
auto nl_numbers = GetDeviceAddress<T1>(inputs, 4);
|
||||
auto nl_serial = GetDeviceAddress<T1>(inputs, 5);
|
||||
auto d_LJ_a = GetDeviceAddress<T>(inputs, 6);
|
||||
auto d_LJ_b = GetDeviceAddress<T>(inputs, 7);
|
||||
|
||||
auto uint_crd_with_LJ = GetDeviceAddress<T>(workspace, 0);
|
||||
auto nl = GetDeviceAddress<T1>(workspace, 1);
|
||||
|
||||
auto frc = GetDeviceAddress<T>(outputs, 0);
|
||||
auto atom_lj_virial = GetDeviceAddress<T>(outputs, 1);
|
||||
auto atom_energy = GetDeviceAddress<T>(outputs, 2);
|
||||
LJ_Direct_CF_Force_With_LJ_Virial_Direct_CF_Energy(atom_numbers, cutoff, pme_beta, uint_crd, LJtype, charge, scaler,
|
||||
uint_crd_with_LJ, nl_numbers, nl_serial, nl, d_LJ_a, d_LJ_b, frc,
|
||||
atom_lj_virial, atom_energy, max_neighbor_numbers,
|
||||
reinterpret_cast<cudaStream_t>(stream_ptr));
|
||||
return true;
|
||||
}
|
||||
|
||||
protected:
|
||||
void InitSizeLists() override {
|
||||
input_size_list_.push_back(ele_uint_crd * sizeof(T2));
|
||||
input_size_list_.push_back(ele_LJtype * sizeof(T1));
|
||||
input_size_list_.push_back(ele_charge * sizeof(T));
|
||||
input_size_list_.push_back(ele_scaler * sizeof(T));
|
||||
input_size_list_.push_back(atom_numbers * sizeof(T1));
|
||||
input_size_list_.push_back(max_neighbor_numbers * sizeof(T1));
|
||||
input_size_list_.push_back(ele_d_LJ_a * sizeof(T));
|
||||
input_size_list_.push_back(ele_d_LJ_b * sizeof(T));
|
||||
|
||||
workspace_size_list_.push_back(atom_numbers * max_neighbor_numbers * sizeof(T1));
|
||||
workspace_size_list_.push_back(atom_numbers * sizeof(UINT_VECTOR_LJ_TYPE));
|
||||
|
||||
output_size_list_.push_back(atom_numbers * 3 * sizeof(T));
|
||||
output_size_list_.push_back(atom_numbers * sizeof(T));
|
||||
output_size_list_.push_back(atom_numbers * sizeof(T));
|
||||
}
|
||||
|
||||
private:
|
||||
size_t ele_uint_crd = 1;
|
||||
size_t ele_LJtype = 1;
|
||||
size_t ele_charge = 1;
|
||||
size_t ele_scaler = 1;
|
||||
size_t ele_nl = 1;
|
||||
size_t ele_d_LJ_a = 1;
|
||||
size_t ele_d_LJ_b = 1;
|
||||
|
||||
std::vector<size_t> input_size_list_;
|
||||
std::vector<size_t> output_size_list_;
|
||||
std::vector<size_t> workspace_size_list_;
|
||||
int atom_numbers;
|
||||
int max_neighbor_numbers;
|
||||
float pme_beta;
|
||||
float cutoff;
|
||||
struct UINT_VECTOR_LJ_TYPE {
|
||||
unsigned int uint_x;
|
||||
unsigned int uint_y;
|
||||
unsigned int uint_z;
|
||||
int LJ_type;
|
||||
float charge;
|
||||
};
|
||||
struct NEIGHBOR_LIST {
|
||||
int atom_numbers;
|
||||
int *atom_serial;
|
||||
};
|
||||
};
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
#endif
|
|
@ -0,0 +1,38 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
/**
|
||||
*Note:
|
||||
* LJForceWithPMEDirectForceUpdate. This is an experimental interface that is subject to change and/or deletion.
|
||||
*/
|
||||
#include "backend/kernel_compiler/gpu/sponge/lj/lj_force_with_pme_direct_force_update_kernel.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
MS_REG_GPU_KERNEL_TWO(LJForceWithPMEDirectForceUpdate,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeUInt32)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddOutputAttr(kNumberTypeFloat32),
|
||||
LJForceWithPMEDirectForceUpdateGpuKernel, float, int)
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
|
@ -0,0 +1,151 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
/**
|
||||
*Note:
|
||||
* LJForceWithPMEDirectForceUpdate. This is an experimental interface that is subject to change and/or deletion.
|
||||
*/
|
||||
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONGE_LJ_LJ_FORCE_WITH_PME_DIRECT_FORCE_KERNEL_H_
|
||||
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONGE_LJ_LJ_FORCE_WITH_PME_DIRECT_FORCE_KERNEL_H_
|
||||
#include <cuda_runtime_api.h>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
|
||||
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
|
||||
#include "runtime/device/gpu/cuda_common.h"
|
||||
#include "backend/kernel_compiler/gpu/cuda_impl/sponge/lj/lj_force_with_pme_direct_force_impl.cuh"
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
template <typename T, typename T1>
|
||||
class LJForceWithPMEDirectForceUpdateGpuKernel : public GpuKernel {
|
||||
public:
|
||||
LJForceWithPMEDirectForceUpdateGpuKernel() : ele_uint_crd(1) {}
|
||||
~LJForceWithPMEDirectForceUpdateGpuKernel() override = default;
|
||||
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
kernel_node_ = kernel_node;
|
||||
atom_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "atom_numbers"));
|
||||
cutoff = static_cast<float>(GetAttr<float_t>(kernel_node, "cutoff"));
|
||||
pme_beta = static_cast<float>(GetAttr<float_t>(kernel_node, "pme_beta"));
|
||||
need_update = static_cast<int>(GetAttr<int64_t>(kernel_node, "need_update"));
|
||||
|
||||
auto shape_uint_crd = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
auto shape_LJtype = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
|
||||
auto shape_charge = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2);
|
||||
auto shape_scaler = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3);
|
||||
auto shape_nl_numbers = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 4);
|
||||
auto shape_nl_serial = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 5);
|
||||
auto shape_d_LJ_a = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 6);
|
||||
auto shape_d_LJ_b = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 7);
|
||||
|
||||
for (size_t i = 0; i < shape_uint_crd.size(); i++) ele_uint_crd *= shape_uint_crd[i];
|
||||
for (size_t i = 0; i < shape_LJtype.size(); i++) ele_LJtype *= shape_LJtype[i];
|
||||
for (size_t i = 0; i < shape_charge.size(); i++) ele_charge *= shape_charge[i];
|
||||
for (size_t i = 0; i < shape_scaler.size(); i++) ele_scaler *= shape_scaler[i];
|
||||
for (size_t i = 0; i < shape_d_LJ_a.size(); i++) ele_d_LJ_a *= shape_d_LJ_a[i];
|
||||
for (size_t i = 0; i < shape_d_LJ_b.size(); i++) ele_d_LJ_b *= shape_d_LJ_b[i];
|
||||
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
|
||||
const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
|
||||
const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
|
||||
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
|
||||
|
||||
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
|
||||
auto uint_crd = GetDeviceAddress<T1>(inputs, 0);
|
||||
auto LJtype = GetDeviceAddress<T1>(inputs, 1);
|
||||
auto charge = GetDeviceAddress<T>(inputs, 2);
|
||||
auto scaler = GetDeviceAddress<T>(inputs, 3);
|
||||
auto nl_numbers = GetDeviceAddress<T1>(inputs, 4);
|
||||
auto nl_serial = GetDeviceAddress<T1>(inputs, 5);
|
||||
auto d_LJ_a = GetDeviceAddress<T>(inputs, 6);
|
||||
auto d_LJ_b = GetDeviceAddress<T>(inputs, 7);
|
||||
auto d_beta = GetDeviceAddress<T>(inputs, 8);
|
||||
|
||||
if (need_update) {
|
||||
cudaMemcpyAsync(&pme_beta, d_beta, sizeof(float), cudaMemcpyDeviceToHost,
|
||||
reinterpret_cast<cudaStream_t>(stream_ptr));
|
||||
cudaStreamSynchronize(reinterpret_cast<cudaStream_t>(stream_ptr));
|
||||
}
|
||||
|
||||
auto uint_crd_with_LJ = GetDeviceAddress<T>(workspace, 0);
|
||||
auto nl = GetDeviceAddress<T1>(workspace, 1);
|
||||
|
||||
auto frc = GetDeviceAddress<T>(outputs, 0);
|
||||
LJForceWithPMEDirectForce(atom_numbers, cutoff, pme_beta, uint_crd, LJtype, charge, scaler, uint_crd_with_LJ,
|
||||
nl_numbers, nl_serial, nl, d_LJ_a, d_LJ_b, frc,
|
||||
reinterpret_cast<cudaStream_t>(stream_ptr));
|
||||
return true;
|
||||
}
|
||||
|
||||
protected:
|
||||
void InitSizeLists() override {
|
||||
input_size_list_.push_back(ele_uint_crd * sizeof(T1));
|
||||
input_size_list_.push_back(ele_LJtype * sizeof(T1));
|
||||
input_size_list_.push_back(ele_charge * sizeof(T));
|
||||
input_size_list_.push_back(ele_scaler * sizeof(T));
|
||||
input_size_list_.push_back(atom_numbers * sizeof(T1));
|
||||
input_size_list_.push_back(max_nl_numbers * sizeof(T1));
|
||||
input_size_list_.push_back(ele_d_LJ_a * sizeof(T));
|
||||
input_size_list_.push_back(ele_d_LJ_b * sizeof(T));
|
||||
input_size_list_.push_back(sizeof(T));
|
||||
|
||||
workspace_size_list_.push_back(atom_numbers * max_nl_numbers * sizeof(T1));
|
||||
workspace_size_list_.push_back(atom_numbers * sizeof(UINT_VECTOR_LJ_TYPE));
|
||||
|
||||
output_size_list_.push_back(atom_numbers * 3 * sizeof(T));
|
||||
}
|
||||
|
||||
private:
|
||||
size_t ele_uint_crd = 1;
|
||||
size_t ele_LJtype = 1;
|
||||
size_t ele_charge = 1;
|
||||
size_t ele_scaler = 1;
|
||||
size_t ele_nl = 1;
|
||||
size_t ele_d_LJ_a = 1;
|
||||
size_t ele_d_LJ_b = 1;
|
||||
|
||||
std::vector<size_t> input_size_list_;
|
||||
std::vector<size_t> output_size_list_;
|
||||
std::vector<size_t> workspace_size_list_;
|
||||
int atom_numbers;
|
||||
float pme_beta;
|
||||
float cutoff;
|
||||
int need_update;
|
||||
int max_nl_numbers = 800;
|
||||
struct UINT_VECTOR_LJ_TYPE {
|
||||
unsigned int uint_x;
|
||||
unsigned int uint_y;
|
||||
unsigned int uint_z;
|
||||
int LJ_type;
|
||||
float charge;
|
||||
};
|
||||
struct NEIGHBOR_LIST {
|
||||
int atom_numbers;
|
||||
int *atom_serial;
|
||||
};
|
||||
struct VECTOR {
|
||||
float x;
|
||||
float y;
|
||||
float z;
|
||||
};
|
||||
};
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
#endif
|
|
@ -0,0 +1,40 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
/**
|
||||
*Note:
|
||||
* LJForceWithVirialEnergyUpdate. This is an experimental interface that is subject to change and/or deletion.
|
||||
*/
|
||||
#include "backend/kernel_compiler/gpu/sponge/lj/lj_force_with_virial_energy_update_kernel.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
MS_REG_GPU_KERNEL_THREE(LJForceWithVirialEnergyUpdate,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeUInt32)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddOutputAttr(kNumberTypeFloat32)
|
||||
.AddOutputAttr(kNumberTypeFloat32)
|
||||
.AddOutputAttr(kNumberTypeFloat32),
|
||||
LJForceWithVirialEnergyUpdateGpuKernel, float, int, unsigned int)
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
|
@ -0,0 +1,152 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
/**
|
||||
*Note:
|
||||
* LJForceWithVirialEnergyUpdate. This is an experimental interface that is subject to change and/or deletion.
|
||||
*/
|
||||
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_LJ_FORCE_WITH_VIRIAL_ENERGY_UPDATE_KERNEL_H_
|
||||
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_LJ_FORCE_WITH_VIRIAL_ENERGY_UPDATE_KERNEL_H_
|
||||
#include <cuda_runtime_api.h>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
|
||||
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
|
||||
#include "runtime/device/gpu/cuda_common.h"
|
||||
#include "backend/kernel_compiler/gpu/cuda_impl/sponge/lj/lj_direct_cf_force_with_lj_virial_direct_cf_energy_impl.cuh"
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
template <typename T, typename T1, typename T2>
|
||||
class LJForceWithVirialEnergyUpdateGpuKernel : public GpuKernel {
|
||||
public:
|
||||
LJForceWithVirialEnergyUpdateGpuKernel() : ele_uint_crd(1) {}
|
||||
~LJForceWithVirialEnergyUpdateGpuKernel() override = default;
|
||||
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
kernel_node_ = kernel_node;
|
||||
atom_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "atom_numbers"));
|
||||
max_neighbor_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "max_neighbor_numbers"));
|
||||
cutoff = static_cast<float>(GetAttr<float_t>(kernel_node, "cutoff"));
|
||||
pme_beta = static_cast<float>(GetAttr<float_t>(kernel_node, "pme_beta"));
|
||||
need_update = static_cast<int>(GetAttr<int64_t>(kernel_node, "need_update"));
|
||||
|
||||
auto shape_uint_crd = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
auto shape_LJtype = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
|
||||
auto shape_charge = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2);
|
||||
auto shape_scaler = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3);
|
||||
auto shape_nl_numbers = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 4);
|
||||
auto shape_nl_serial = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 5);
|
||||
auto shape_d_LJ_a = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 6);
|
||||
auto shape_d_LJ_b = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 7);
|
||||
|
||||
for (size_t i = 0; i < shape_uint_crd.size(); i++) ele_uint_crd *= shape_uint_crd[i];
|
||||
for (size_t i = 0; i < shape_LJtype.size(); i++) ele_LJtype *= shape_LJtype[i];
|
||||
for (size_t i = 0; i < shape_charge.size(); i++) ele_charge *= shape_charge[i];
|
||||
for (size_t i = 0; i < shape_scaler.size(); i++) ele_scaler *= shape_scaler[i];
|
||||
for (size_t i = 0; i < shape_d_LJ_a.size(); i++) ele_d_LJ_a *= shape_d_LJ_a[i];
|
||||
for (size_t i = 0; i < shape_d_LJ_b.size(); i++) ele_d_LJ_b *= shape_d_LJ_b[i];
|
||||
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
|
||||
const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
|
||||
const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
|
||||
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
|
||||
|
||||
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
|
||||
auto uint_crd = GetDeviceAddress<T2>(inputs, 0);
|
||||
auto LJtype = GetDeviceAddress<T1>(inputs, 1);
|
||||
auto charge = GetDeviceAddress<T>(inputs, 2);
|
||||
auto scaler = GetDeviceAddress<T>(inputs, 3);
|
||||
auto nl_numbers = GetDeviceAddress<T1>(inputs, 4);
|
||||
auto nl_serial = GetDeviceAddress<T1>(inputs, 5);
|
||||
auto d_LJ_a = GetDeviceAddress<T>(inputs, 6);
|
||||
auto d_LJ_b = GetDeviceAddress<T>(inputs, 7);
|
||||
auto d_beta = GetDeviceAddress<T>(inputs, 8);
|
||||
|
||||
auto uint_crd_with_LJ = GetDeviceAddress<T>(workspace, 0);
|
||||
auto nl = GetDeviceAddress<T1>(workspace, 1);
|
||||
|
||||
auto frc = GetDeviceAddress<T>(outputs, 0);
|
||||
auto atom_lj_virial = GetDeviceAddress<T>(outputs, 1);
|
||||
auto atom_energy = GetDeviceAddress<T>(outputs, 2);
|
||||
|
||||
if (need_update) {
|
||||
cudaMemcpyAsync(&pme_beta, d_beta, sizeof(float), cudaMemcpyDeviceToHost,
|
||||
reinterpret_cast<cudaStream_t>(stream_ptr));
|
||||
cudaStreamSynchronize(reinterpret_cast<cudaStream_t>(stream_ptr));
|
||||
}
|
||||
|
||||
LJ_Direct_CF_Force_With_LJ_Virial_Direct_CF_Energy(atom_numbers, cutoff, pme_beta, uint_crd, LJtype, charge, scaler,
|
||||
uint_crd_with_LJ, nl_numbers, nl_serial, nl, d_LJ_a, d_LJ_b, frc,
|
||||
atom_lj_virial, atom_energy, max_neighbor_numbers,
|
||||
reinterpret_cast<cudaStream_t>(stream_ptr));
|
||||
return true;
|
||||
}
|
||||
|
||||
protected:
|
||||
void InitSizeLists() override {
|
||||
input_size_list_.push_back(ele_uint_crd * sizeof(T2));
|
||||
input_size_list_.push_back(ele_LJtype * sizeof(T1));
|
||||
input_size_list_.push_back(ele_charge * sizeof(T));
|
||||
input_size_list_.push_back(ele_scaler * sizeof(T));
|
||||
input_size_list_.push_back(atom_numbers * sizeof(T1));
|
||||
input_size_list_.push_back(max_neighbor_numbers * sizeof(T1));
|
||||
input_size_list_.push_back(ele_d_LJ_a * sizeof(T));
|
||||
input_size_list_.push_back(ele_d_LJ_b * sizeof(T));
|
||||
|
||||
workspace_size_list_.push_back(atom_numbers * max_neighbor_numbers * sizeof(T1));
|
||||
workspace_size_list_.push_back(atom_numbers * sizeof(UINT_VECTOR_LJ_TYPE));
|
||||
|
||||
output_size_list_.push_back(atom_numbers * 3 * sizeof(T));
|
||||
output_size_list_.push_back(atom_numbers * sizeof(T));
|
||||
output_size_list_.push_back(atom_numbers * sizeof(T));
|
||||
}
|
||||
|
||||
private:
|
||||
size_t ele_uint_crd = 1;
|
||||
size_t ele_LJtype = 1;
|
||||
size_t ele_charge = 1;
|
||||
size_t ele_scaler = 1;
|
||||
size_t ele_nl = 1;
|
||||
size_t ele_d_LJ_a = 1;
|
||||
size_t ele_d_LJ_b = 1;
|
||||
|
||||
std::vector<size_t> input_size_list_;
|
||||
std::vector<size_t> output_size_list_;
|
||||
std::vector<size_t> workspace_size_list_;
|
||||
int atom_numbers;
|
||||
int max_neighbor_numbers;
|
||||
float pme_beta;
|
||||
float cutoff;
|
||||
int need_update;
|
||||
struct UINT_VECTOR_LJ_TYPE {
|
||||
unsigned int uint_x;
|
||||
unsigned int uint_y;
|
||||
unsigned int uint_z;
|
||||
int LJ_type;
|
||||
float charge;
|
||||
};
|
||||
struct NEIGHBOR_LIST {
|
||||
int atom_numbers;
|
||||
int *atom_serial;
|
||||
};
|
||||
};
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
#endif
|
|
@ -13,14 +13,19 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
/**
|
||||
*Note:
|
||||
* MDIterationLeapFrog. This is an experimental interface that is subject to change and/or deletion.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_NVTIT_MD_ITERATION_LEAP_FROG_KERNEL_H_
|
||||
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_NVTIT_MD_ITERATION_LEAP_FROG_KERNEL_H_
|
||||
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_MD_ITERATION_LEAP_FROG_KERNEL_H_
|
||||
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_MD_ITERATION_LEAP_FROG_KERNEL_H_
|
||||
|
||||
#include <cuda_runtime_api.h>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
|
||||
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
|
||||
#include "runtime/device/gpu/cuda_common.h"
|
||||
|
@ -31,25 +36,13 @@ namespace kernel {
|
|||
template <typename T>
|
||||
class MDIterationLeapFrogGpuKernel : public GpuKernel {
|
||||
public:
|
||||
MDIterationLeapFrogGpuKernel() : ele_mass_inverse(1) {}
|
||||
MDIterationLeapFrogGpuKernel() {}
|
||||
~MDIterationLeapFrogGpuKernel() override = default;
|
||||
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
kernel_node_ = kernel_node;
|
||||
float4_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "float4_numbers"));
|
||||
atom_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "atom_numbers"));
|
||||
half_dt = static_cast<float>(GetAttr<float>(kernel_node, "half_dt"));
|
||||
dt = static_cast<float>(GetAttr<float>(kernel_node, "dt"));
|
||||
exp_gamma = static_cast<float>(GetAttr<float>(kernel_node, "exp_gamma"));
|
||||
is_max_velocity = static_cast<int>(GetAttr<int64_t>(kernel_node, "is_max_velocity"));
|
||||
max_velocity = static_cast<float>(GetAttr<float>(kernel_node, "max_velocity"));
|
||||
|
||||
auto shape_mass_inverse = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
auto shape_qrt_mass = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
|
||||
|
||||
for (size_t i = 0; i < shape_mass_inverse.size(); i++) ele_mass_inverse *= shape_mass_inverse[i];
|
||||
for (size_t i = 0; i < shape_qrt_mass.size(); i++) ele_sqrt_mass *= shape_qrt_mass[i];
|
||||
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
|
@ -58,49 +51,36 @@ class MDIterationLeapFrogGpuKernel : public GpuKernel {
|
|||
const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
|
||||
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
|
||||
|
||||
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
|
||||
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
|
||||
auto d_mass_inverse = GetDeviceAddress<const T>(inputs, 0);
|
||||
auto d_sqrt_mass = GetDeviceAddress<const T>(inputs, 1);
|
||||
|
||||
auto vel_f = GetDeviceAddress<T>(outputs, 0);
|
||||
auto crd_f = GetDeviceAddress<T>(outputs, 1);
|
||||
auto frc_f = GetDeviceAddress<T>(outputs, 2);
|
||||
auto acc_f = GetDeviceAddress<T>(outputs, 3);
|
||||
|
||||
MDIterationLeapFrog(float4_numbers, atom_numbers, half_dt, dt, exp_gamma, is_max_velocity, max_velocity,
|
||||
d_mass_inverse, d_sqrt_mass, vel_f, crd_f, frc_f, acc_f,
|
||||
reinterpret_cast<cudaStream_t>(stream_ptr));
|
||||
auto vel = GetDeviceAddress<float>(inputs, 0);
|
||||
auto crd = GetDeviceAddress<float>(inputs, 1);
|
||||
auto frc = GetDeviceAddress<float>(inputs, 2);
|
||||
auto acc = GetDeviceAddress<float>(inputs, 3);
|
||||
auto inverse_mass = GetDeviceAddress<float>(inputs, 4);
|
||||
|
||||
MDIterationLeapFrog(atom_numbers, vel, crd, frc, acc, inverse_mass, dt, reinterpret_cast<cudaStream_t>(stream_ptr));
|
||||
return true;
|
||||
}
|
||||
|
||||
protected:
|
||||
void InitSizeLists() override {
|
||||
input_size_list_.push_back(ele_mass_inverse * sizeof(T));
|
||||
input_size_list_.push_back(ele_sqrt_mass * sizeof(T));
|
||||
input_size_list_.push_back(atom_numbers * 3 * sizeof(T));
|
||||
input_size_list_.push_back(atom_numbers * 3 * sizeof(T));
|
||||
input_size_list_.push_back(atom_numbers * 3 * sizeof(T));
|
||||
input_size_list_.push_back(atom_numbers * 3 * sizeof(T));
|
||||
input_size_list_.push_back(atom_numbers * sizeof(T));
|
||||
|
||||
output_size_list_.push_back(3 * atom_numbers * sizeof(T));
|
||||
output_size_list_.push_back(3 * atom_numbers * sizeof(T));
|
||||
output_size_list_.push_back(3 * atom_numbers * sizeof(T));
|
||||
output_size_list_.push_back(3 * atom_numbers * sizeof(T));
|
||||
output_size_list_.push_back(sizeof(T));
|
||||
}
|
||||
|
||||
private:
|
||||
size_t ele_mass_inverse = 1;
|
||||
size_t ele_sqrt_mass = 1;
|
||||
|
||||
std::vector<size_t> input_size_list_;
|
||||
std::vector<size_t> output_size_list_;
|
||||
std::vector<size_t> workspace_size_list_;
|
||||
int float4_numbers;
|
||||
int atom_numbers;
|
||||
float half_dt;
|
||||
float dt;
|
||||
float exp_gamma;
|
||||
int is_max_velocity;
|
||||
float max_velocity;
|
||||
};
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_NVTIT_MD_ITERATION_LEAP_FROG_KERNEL_H_
|
||||
#endif
|
||||
|
|
|
@ -0,0 +1,42 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
/**
|
||||
*Note:
|
||||
* PMEEnergyUpdate. This is an experimental interface that is subject to change and/or deletion.
|
||||
*/
|
||||
#include "backend/kernel_compiler/gpu/sponge/pme/pme_energy_update_kernel.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
MS_REG_GPU_KERNEL_TWO(PMEEnergyUpdate,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeUInt32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddOutputAttr(kNumberTypeFloat32)
|
||||
.AddOutputAttr(kNumberTypeFloat32)
|
||||
.AddOutputAttr(kNumberTypeFloat32)
|
||||
.AddOutputAttr(kNumberTypeFloat32),
|
||||
PMEEnergyUpdateGpuKernel, float, int)
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
|
@ -0,0 +1,313 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
/**
|
||||
*Note:
|
||||
* PMEEnergyUpdate. This is an experimental interface that is subject to change and/or deletion.
|
||||
*/
|
||||
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONGE_PME_PME_ENERGY_UPDATE_KERNEL_H_
|
||||
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONGE_PME_PME_ENERGY_UPDATE_KERNEL_H_
|
||||
#include <cuda_runtime_api.h>
|
||||
#include <cufft.h>
|
||||
#include <vector>
|
||||
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
|
||||
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
|
||||
#include "runtime/device/gpu/cuda_common.h"
|
||||
#include "backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_energy_update_impl.cuh"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
template <typename T, typename T1>
|
||||
class PMEEnergyUpdateGpuKernel : public GpuKernel {
|
||||
public:
|
||||
PMEEnergyUpdateGpuKernel() : ele_uint_crd(1) {}
|
||||
~PMEEnergyUpdateGpuKernel() override = default;
|
||||
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
kernel_node_ = kernel_node;
|
||||
atom_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "atom_numbers"));
|
||||
excluded_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "excluded_numbers"));
|
||||
beta = static_cast<float>(GetAttr<float_t>(kernel_node, "beta"));
|
||||
fftx = static_cast<int>(GetAttr<int64_t>(kernel_node, "fftx"));
|
||||
ffty = static_cast<int>(GetAttr<int64_t>(kernel_node, "ffty"));
|
||||
fftz = static_cast<int>(GetAttr<int64_t>(kernel_node, "fftz"));
|
||||
|
||||
float box_length_0 = static_cast<float>(GetAttr<float_t>(kernel_node, "box_length_0"));
|
||||
float box_length_1 = static_cast<float>(GetAttr<float_t>(kernel_node, "box_length_1"));
|
||||
float box_length_2 = static_cast<float>(GetAttr<float_t>(kernel_node, "box_length_2"));
|
||||
max_neighbor_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "max_neighbor_numbers"));
|
||||
need_update = static_cast<int>(GetAttr<int64_t>(kernel_node, "need_update"));
|
||||
|
||||
cufftPlan3d(&PME_plan_r2c, fftx, ffty, fftz, CUFFT_R2C);
|
||||
cufftPlan3d(&PME_plan_c2r, fftx, ffty, fftz, CUFFT_C2R);
|
||||
_thread_PME.x = 8;
|
||||
_thread_PME.y = 8;
|
||||
PME_Nin = ffty * fftz;
|
||||
PME_Nfft = fftx * ffty * (fftz / 2 + 1);
|
||||
PME_Nall = fftx * ffty * fftz;
|
||||
PME_kxyz_cpu.resize(64);
|
||||
|
||||
std::vector<float> h_box_length{box_length_0, box_length_1, box_length_2};
|
||||
VECTOR *box_length = reinterpret_cast<VECTOR *>(h_box_length.data());
|
||||
|
||||
volume = box_length[0].x * box_length[0].y * box_length[0].z;
|
||||
int kx, ky, kz, kxrp, kyrp, kzrp, index;
|
||||
for (kx = 0; kx < 4; kx++) {
|
||||
for (ky = 0; ky < 4; ky++) {
|
||||
for (kz = 0; kz < 4; kz++) {
|
||||
index = kx * 16 + ky * 4 + kz;
|
||||
PME_kxyz_cpu[index].uint_x = kx;
|
||||
PME_kxyz_cpu[index].uint_y = ky;
|
||||
PME_kxyz_cpu[index].uint_z = kz;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
B1.resize(fftx);
|
||||
B2.resize(ffty);
|
||||
B3.resize(fftz);
|
||||
PME_BC0.resize(PME_Nfft);
|
||||
for (kx = 0; kx < fftx; kx++) {
|
||||
B1[kx] = getb(kx, fftx, 4);
|
||||
}
|
||||
|
||||
for (ky = 0; ky < ffty; ky++) {
|
||||
B2[ky] = getb(ky, ffty, 4);
|
||||
}
|
||||
|
||||
for (kz = 0; kz < fftz; kz++) {
|
||||
B3[kz] = getb(kz, fftz, 4);
|
||||
}
|
||||
float mprefactor = PI * PI / -beta / beta;
|
||||
|
||||
float msq;
|
||||
for (kx = 0; kx < fftx; kx++) {
|
||||
kxrp = kx;
|
||||
if (kx > fftx / 2) kxrp = fftx - kx;
|
||||
for (ky = 0; ky < ffty; ky++) {
|
||||
kyrp = ky;
|
||||
if (ky > ffty / 2) kyrp = ffty - ky;
|
||||
for (kz = 0; kz <= fftz / 2; kz++) {
|
||||
kzrp = kz;
|
||||
|
||||
msq = kxrp * kxrp / box_length[0].x / box_length[0].x + kyrp * kyrp / box_length[0].y / box_length[0].y +
|
||||
kzrp * kzrp / box_length[0].z / box_length[0].z;
|
||||
index = kx * ffty * (fftz / 2 + 1) + ky * (fftz / 2 + 1) + kz;
|
||||
if ((kx + ky + kz) == 0) {
|
||||
PME_BC0[index] = 0;
|
||||
} else {
|
||||
PME_BC0[index] = 1.0 / PI / msq * exp(mprefactor * msq) / volume;
|
||||
}
|
||||
|
||||
PME_BC0[index] *= B1[kx] * B2[ky] * B3[kz];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
|
||||
const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
|
||||
const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
|
||||
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
|
||||
|
||||
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
|
||||
auto uint_crd = GetDeviceAddress<T1>(inputs, 0);
|
||||
auto charge = GetDeviceAddress<T>(inputs, 1);
|
||||
auto nl_numbers = GetDeviceAddress<T1>(inputs, 2);
|
||||
auto nl_serial = GetDeviceAddress<T1>(inputs, 3);
|
||||
auto scaler = GetDeviceAddress<T>(inputs, 4);
|
||||
auto excluded_list_start = GetDeviceAddress<int>(inputs, 5);
|
||||
auto excluded_list = GetDeviceAddress<int>(inputs, 6);
|
||||
auto excluded_atom_numbers = GetDeviceAddress<int>(inputs, 7);
|
||||
auto neutralizing_factor = GetDeviceAddress<T>(inputs, 8);
|
||||
auto d_beta = GetDeviceAddress<T>(inputs, 9);
|
||||
|
||||
auto pme_uxyz = GetDeviceAddress<int>(workspace, 0); // workspace
|
||||
auto pme_frxyz = GetDeviceAddress<float>(workspace, 1); // workspace
|
||||
auto pme_q = GetDeviceAddress<T>(workspace, 2); // workspace
|
||||
auto pme_fq = GetDeviceAddress<float>(workspace, 3); // workspace
|
||||
auto pme_atom_near = GetDeviceAddress<int>(workspace, 4); // workspace
|
||||
auto pme_bc = GetDeviceAddress<float>(workspace, 5); // workspace
|
||||
auto pme_kxyz = GetDeviceAddress<int>(workspace, 6); // workspace
|
||||
auto nl = GetDeviceAddress<T1>(workspace, 7);
|
||||
auto charge_sum = GetDeviceAddress<float>(workspace, 8);
|
||||
|
||||
auto reciprocal_ene = GetDeviceAddress<T>(outputs, 0);
|
||||
auto self_ene = GetDeviceAddress<T>(outputs, 1);
|
||||
auto direct_ene = GetDeviceAddress<T>(outputs, 2);
|
||||
auto correction_ene = GetDeviceAddress<T>(outputs, 3);
|
||||
|
||||
h_beta = beta;
|
||||
if (need_update) {
|
||||
cudaMemcpyAsync(&h_beta, d_beta, sizeof(float), cudaMemcpyDeviceToHost,
|
||||
reinterpret_cast<cudaStream_t>(stream_ptr));
|
||||
cudaStreamSynchronize(reinterpret_cast<cudaStream_t>(stream_ptr));
|
||||
factor = h_beta / beta;
|
||||
double factor_inverse = 1.0 / factor;
|
||||
|
||||
for (int i = 0; i < PME_Nfft; i++) {
|
||||
PME_BC0[i] *= factor_inverse; // update PME_BC0
|
||||
}
|
||||
}
|
||||
|
||||
cufftSetStream(PME_plan_r2c, reinterpret_cast<cudaStream_t>(stream_ptr));
|
||||
cufftSetStream(PME_plan_c2r, reinterpret_cast<cudaStream_t>(stream_ptr));
|
||||
cudaMemcpyAsync(pme_kxyz, PME_kxyz_cpu.data(), sizeof(UNSIGNED_INT_VECTOR) * 64, cudaMemcpyHostToDevice,
|
||||
reinterpret_cast<cudaStream_t>(stream_ptr));
|
||||
cudaMemcpyAsync(pme_bc, PME_BC0.data(), sizeof(float) * PME_Nfft, cudaMemcpyHostToDevice,
|
||||
reinterpret_cast<cudaStream_t>(stream_ptr));
|
||||
|
||||
PMEEnergyUpdate(fftx, ffty, fftz, atom_numbers, h_beta, pme_bc, pme_uxyz, pme_frxyz, pme_q, pme_fq, pme_atom_near,
|
||||
pme_kxyz, uint_crd, charge, nl_numbers, nl_serial, nl, scaler, excluded_list_start, excluded_list,
|
||||
excluded_atom_numbers, reciprocal_ene, self_ene, direct_ene, correction_ene, _thread_PME, PME_Nin,
|
||||
PME_Nfft, PME_Nall, PME_plan_r2c, PME_plan_c2r, neutralizing_factor, charge_sum,
|
||||
max_neighbor_numbers, reinterpret_cast<cudaStream_t>(stream_ptr));
|
||||
return true;
|
||||
}
|
||||
|
||||
protected:
|
||||
void InitSizeLists() override {
|
||||
input_size_list_.push_back(atom_numbers * sizeof(UNSIGNED_INT_VECTOR));
|
||||
input_size_list_.push_back(atom_numbers * sizeof(VECTOR));
|
||||
input_size_list_.push_back(atom_numbers * sizeof(T1));
|
||||
input_size_list_.push_back(max_neighbor_numbers * sizeof(T1));
|
||||
input_size_list_.push_back(sizeof(VECTOR));
|
||||
|
||||
input_size_list_.push_back(atom_numbers * sizeof(T1));
|
||||
input_size_list_.push_back(excluded_numbers * sizeof(T1));
|
||||
input_size_list_.push_back(atom_numbers * sizeof(T1));
|
||||
input_size_list_.push_back(sizeof(T));
|
||||
input_size_list_.push_back(sizeof(T));
|
||||
|
||||
workspace_size_list_.push_back(atom_numbers * sizeof(UNSIGNED_INT_VECTOR));
|
||||
workspace_size_list_.push_back(atom_numbers * sizeof(VECTOR));
|
||||
workspace_size_list_.push_back(PME_Nall * sizeof(T));
|
||||
workspace_size_list_.push_back(PME_Nfft * sizeof(cufftComplex));
|
||||
workspace_size_list_.push_back(atom_numbers * 64 * sizeof(int));
|
||||
workspace_size_list_.push_back(PME_Nfft * sizeof(float));
|
||||
workspace_size_list_.push_back(64 * sizeof(UNSIGNED_INT_VECTOR));
|
||||
workspace_size_list_.push_back(atom_numbers * max_neighbor_numbers * sizeof(T1));
|
||||
workspace_size_list_.push_back(sizeof(T));
|
||||
|
||||
output_size_list_.push_back(sizeof(T));
|
||||
output_size_list_.push_back(sizeof(T));
|
||||
output_size_list_.push_back(sizeof(T));
|
||||
output_size_list_.push_back(sizeof(T));
|
||||
}
|
||||
|
||||
cufftComplex expc(cufftComplex z) {
|
||||
cufftComplex res;
|
||||
float t = expf(z.x);
|
||||
sincosf(z.y, &res.y, &res.x);
|
||||
res.x *= t;
|
||||
res.y *= t;
|
||||
return res;
|
||||
}
|
||||
|
||||
float M_(float u, int n) {
|
||||
if (n == 2) {
|
||||
if (u > 2 || u < 0) return 0;
|
||||
return 1 - abs(u - 1);
|
||||
} else {
|
||||
return u / (n - 1) * M_(u, n - 1) + (n - u) / (n - 1) * M_(u - 1, n - 1);
|
||||
}
|
||||
}
|
||||
|
||||
float getb(int k, int NFFT, int B_order) {
|
||||
cufftComplex tempc, tempc2, res;
|
||||
float tempf;
|
||||
tempc2.x = 0;
|
||||
tempc2.y = 0;
|
||||
|
||||
tempc.x = 0;
|
||||
if (NFFT == 0) {
|
||||
MS_LOG(ERROR) << "Divide by zero.";
|
||||
} else {
|
||||
tempc.y = 2 * (B_order - 1) * PI * k / NFFT;
|
||||
}
|
||||
res = expc(tempc);
|
||||
|
||||
for (int kk = 0; kk < (B_order - 1); kk++) {
|
||||
tempc.x = 0;
|
||||
if (NFFT == 0) {
|
||||
MS_LOG(ERROR) << "Divide by zero.";
|
||||
break;
|
||||
} else {
|
||||
tempc.y = 2 * PI * k / NFFT * kk;
|
||||
}
|
||||
tempc = expc(tempc);
|
||||
tempf = M_(kk + 1, B_order);
|
||||
tempc2.x += tempf * tempc.x;
|
||||
tempc2.y += tempf * tempc.y;
|
||||
}
|
||||
res = cuCdivf(res, tempc2);
|
||||
return res.x * res.x + res.y * res.y;
|
||||
}
|
||||
|
||||
private:
|
||||
size_t ele_uint_crd = 1;
|
||||
|
||||
std::vector<size_t> input_size_list_;
|
||||
std::vector<size_t> output_size_list_;
|
||||
std::vector<size_t> workspace_size_list_;
|
||||
|
||||
std::vector<float> B1;
|
||||
std::vector<float> B2;
|
||||
std::vector<float> B3;
|
||||
std::vector<float> PME_BC0;
|
||||
|
||||
int atom_numbers;
|
||||
int excluded_numbers;
|
||||
int max_neighbor_numbers;
|
||||
int fftx;
|
||||
int ffty;
|
||||
int fftz;
|
||||
float beta;
|
||||
float factor;
|
||||
float h_beta;
|
||||
int PME_Nin;
|
||||
int PME_Nall;
|
||||
int PME_Nfft;
|
||||
float volume;
|
||||
float PI = 3.1415926;
|
||||
int need_update;
|
||||
cufftHandle PME_plan_r2c;
|
||||
cufftHandle PME_plan_c2r;
|
||||
|
||||
dim3 _thread_PME;
|
||||
|
||||
struct VECTOR {
|
||||
float x;
|
||||
float y;
|
||||
float z;
|
||||
};
|
||||
|
||||
struct UNSIGNED_INT_VECTOR {
|
||||
unsigned int uint_x;
|
||||
unsigned int uint_y;
|
||||
unsigned int uint_z;
|
||||
};
|
||||
std::vector<UNSIGNED_INT_VECTOR> PME_kxyz_cpu;
|
||||
struct NEIGHBOR_LIST {
|
||||
int atom_numbers;
|
||||
int *atom_serial;
|
||||
};
|
||||
};
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
#endif
|
|
@ -13,17 +13,20 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
/**
|
||||
*Note:
|
||||
* PMEExcludedForce. This is an experimental interface that is subject to change and/or deletion.
|
||||
*/
|
||||
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONGE_PME_PME_EXCLUDED_FORCE_KERNEL_H_
|
||||
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONGE_PME_PME_EXCLUDED_FORCE_KERNEL_H_
|
||||
#include <cuda_runtime_api.h>
|
||||
#include <cufft.h>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
|
||||
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
|
||||
#include "runtime/device/gpu/cuda_common.h"
|
||||
#include "backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_excluded_force_impl.cuh"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
template <typename T, typename T1>
|
||||
|
|
|
@ -0,0 +1,36 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
/**
|
||||
*Note:
|
||||
* PMEExcludedForceUpdate. This is an experimental interface that is subject to change and/or deletion.
|
||||
*/
|
||||
#include "backend/kernel_compiler/gpu/sponge/pme/pme_excluded_force_update_kernel.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
MS_REG_GPU_KERNEL_TWO(PMEExcludedForceUpdate,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeUInt32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddOutputAttr(kNumberTypeFloat32),
|
||||
PMEExcludedForceUpdateGpuKernel, float, int)
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
|
@ -0,0 +1,112 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
/**
|
||||
*Note:
|
||||
* PMEExcludedForceUpdate. This is an experimental interface that is subject to change and/or deletion.
|
||||
*/
|
||||
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONGE_PME_PME_EXCLUDED_FORCE_KERNEL_H_
|
||||
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONGE_PME_PME_EXCLUDED_FORCE_KERNEL_H_
|
||||
|
||||
#include <cuda_runtime_api.h>
|
||||
#include <cufft.h>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "runtime/device/gpu/cuda_common.h"
|
||||
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
|
||||
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
|
||||
#include "backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_excluded_force_impl.cuh"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
template <typename T, typename T1>
|
||||
class PMEExcludedForceUpdateGpuKernel : public GpuKernel {
|
||||
public:
|
||||
PMEExcludedForceUpdateGpuKernel() : ele_uint_crd(1) {}
|
||||
~PMEExcludedForceUpdateGpuKernel() override = default;
|
||||
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
kernel_node_ = kernel_node;
|
||||
atom_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "atom_numbers"));
|
||||
excluded_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "excluded_numbers"));
|
||||
beta = static_cast<float>(GetAttr<float_t>(kernel_node, "beta"));
|
||||
need_update = static_cast<int>(GetAttr<int64_t>(kernel_node, "need_update"));
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
|
||||
const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
|
||||
const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
|
||||
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
|
||||
|
||||
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
|
||||
auto uint_crd = GetDeviceAddress<int>(inputs, 0);
|
||||
auto sacler = GetDeviceAddress<T>(inputs, 1);
|
||||
auto charge = GetDeviceAddress<T>(inputs, 2);
|
||||
auto excluded_list_start = GetDeviceAddress<int>(inputs, 3);
|
||||
auto excluded_list = GetDeviceAddress<int>(inputs, 4);
|
||||
auto excluded_atom_numbers = GetDeviceAddress<int>(inputs, 5);
|
||||
auto d_beta = GetDeviceAddress<T>(inputs, 6);
|
||||
if (need_update) {
|
||||
cudaMemcpyAsync(&beta, d_beta, sizeof(float), cudaMemcpyDeviceToHost, reinterpret_cast<cudaStream_t>(stream_ptr));
|
||||
cudaStreamSynchronize(reinterpret_cast<cudaStream_t>(stream_ptr));
|
||||
}
|
||||
|
||||
auto force = GetDeviceAddress<T>(outputs, 0);
|
||||
PMEExcludedForce(atom_numbers, beta, uint_crd, sacler, charge, excluded_list_start, excluded_list,
|
||||
excluded_atom_numbers, force, reinterpret_cast<cudaStream_t>(stream_ptr));
|
||||
return true;
|
||||
}
|
||||
|
||||
protected:
|
||||
void InitSizeLists() override {
|
||||
input_size_list_.push_back(atom_numbers * sizeof(UNSIGNED_INT_VECTOR));
|
||||
input_size_list_.push_back(sizeof(VECTOR));
|
||||
input_size_list_.push_back(atom_numbers * sizeof(T));
|
||||
input_size_list_.push_back(atom_numbers * sizeof(T1));
|
||||
input_size_list_.push_back(excluded_numbers * sizeof(T1));
|
||||
input_size_list_.push_back(atom_numbers * sizeof(T1));
|
||||
input_size_list_.push_back(sizeof(T));
|
||||
|
||||
output_size_list_.push_back(atom_numbers * 3 * sizeof(T));
|
||||
}
|
||||
|
||||
private:
|
||||
size_t ele_uint_crd = 1;
|
||||
std::vector<size_t> input_size_list_;
|
||||
std::vector<size_t> output_size_list_;
|
||||
std::vector<size_t> workspace_size_list_;
|
||||
int atom_numbers;
|
||||
int excluded_numbers;
|
||||
float beta;
|
||||
float factor;
|
||||
int need_update;
|
||||
struct VECTOR {
|
||||
float x;
|
||||
float y;
|
||||
float z;
|
||||
};
|
||||
|
||||
struct UNSIGNED_INT_VECTOR {
|
||||
unsigned int uint_x;
|
||||
unsigned int uint_y;
|
||||
unsigned int uint_z;
|
||||
};
|
||||
};
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
#endif
|
|
@ -13,6 +13,10 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
/**
|
||||
*Note:
|
||||
* PMEReciprocalForce. This is an experimental interface that is subject to change and/or deletion.
|
||||
*/
|
||||
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONGE_PME_PME_RECIPROCAL_FORCE_KERNEL_H_
|
||||
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONGE_PME_PME_RECIPROCAL_FORCE_KERNEL_H_
|
||||
#include <cuda_runtime_api.h>
|
||||
|
@ -144,9 +148,8 @@ class PMEReciprocalForceGpuKernel : public GpuKernel {
|
|||
|
||||
protected:
|
||||
void InitSizeLists() override {
|
||||
input_size_list_.push_back(sizeof(VECTOR));
|
||||
input_size_list_.push_back(atom_numbers * sizeof(UNSIGNED_INT_VECTOR));
|
||||
input_size_list_.push_back(atom_numbers * sizeof(VECTOR));
|
||||
input_size_list_.push_back(atom_numbers * sizeof(float));
|
||||
|
||||
workspace_size_list_.push_back(atom_numbers * sizeof(UNSIGNED_INT_VECTOR));
|
||||
workspace_size_list_.push_back(atom_numbers * sizeof(VECTOR));
|
||||
|
|
|
@ -0,0 +1,32 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
/**
|
||||
*Note:
|
||||
* PMEReciprocalForceUpdate. This is an experimental interface that is subject to change and/or deletion.
|
||||
*/
|
||||
#include "backend/kernel_compiler/gpu/sponge/pme/pme_reciprocal_force_update_kernel.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
MS_REG_GPU_KERNEL_TWO(PMEReciprocalForceUpdate,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeUInt32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddOutputAttr(kNumberTypeFloat32),
|
||||
PMEReciprocalForceUpdateGpuKernel, float, int)
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
|
@ -0,0 +1,280 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
/**
|
||||
*Note:
|
||||
* PMEReciprocalForceUpdate. This is an experimental interface that is subject to change and/or deletion.
|
||||
*/
|
||||
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONGE_PME_PME_RECIPROCAL_FORCE_UPDATE_KERNEL_H_
|
||||
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONGE_PME_PME_RECIPROCAL_FORCE_UPDATE_KERNEL_H_
|
||||
#include <cuda_runtime_api.h>
|
||||
#include <cufft.h>
|
||||
#include <vector>
|
||||
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
|
||||
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
|
||||
#include "runtime/device/gpu/cuda_common.h"
|
||||
#include "backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_reciprocal_force_impl.cuh"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
template <typename T, typename T1>
|
||||
class PMEReciprocalForceUpdateGpuKernel : public GpuKernel {
|
||||
public:
|
||||
PMEReciprocalForceUpdateGpuKernel() : ele_uint_crd(1) {}
|
||||
~PMEReciprocalForceUpdateGpuKernel() override = default;
|
||||
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
kernel_node_ = kernel_node;
|
||||
atom_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "atom_numbers"));
|
||||
beta = static_cast<float>(GetAttr<float_t>(kernel_node, "beta"));
|
||||
fftx = static_cast<int>(GetAttr<int64_t>(kernel_node, "fftx"));
|
||||
ffty = static_cast<int>(GetAttr<int64_t>(kernel_node, "ffty"));
|
||||
fftz = static_cast<int>(GetAttr<int64_t>(kernel_node, "fftz"));
|
||||
|
||||
float box_length_0 = static_cast<float>(GetAttr<float_t>(kernel_node, "box_length_0"));
|
||||
float box_length_1 = static_cast<float>(GetAttr<float_t>(kernel_node, "box_length_1"));
|
||||
float box_length_2 = static_cast<float>(GetAttr<float_t>(kernel_node, "box_length_2"));
|
||||
|
||||
need_update = static_cast<int>(GetAttr<int64_t>(kernel_node, "need_update"));
|
||||
PME_Nall = fftx * ffty * fftz;
|
||||
PME_Nfft = fftx * ffty * (fftz / 2 + 1);
|
||||
PME_Nin = ffty * fftz;
|
||||
cufftPlan3d(&PME_plan_r2c, fftx, ffty, fftz, CUFFT_R2C);
|
||||
cufftPlan3d(&PME_plan_c2r, fftx, ffty, fftz, CUFFT_C2R);
|
||||
|
||||
std::vector<float> h_box_length{box_length_0, box_length_1, box_length_2};
|
||||
VECTOR *box_length = reinterpret_cast<VECTOR *>(h_box_length.data());
|
||||
|
||||
PME_inverse_box_vector0.x = static_cast<float>(fftx) / box_length[0].x;
|
||||
PME_inverse_box_vector0.y = static_cast<float>(ffty) / box_length[0].y;
|
||||
PME_inverse_box_vector0.z = static_cast<float>(fftz) / box_length[0].z;
|
||||
PME_inverse_box_vector.x = PME_inverse_box_vector0.x;
|
||||
PME_inverse_box_vector.y = PME_inverse_box_vector0.y;
|
||||
PME_inverse_box_vector.z = PME_inverse_box_vector0.z;
|
||||
float volume = box_length[0].x * box_length[0].y * box_length[0].z;
|
||||
PME_kxyz_cpu.resize(64);
|
||||
int kx, ky, kz, kxrp, kyrp, kzrp, index;
|
||||
for (kx = 0; kx < 4; kx++) {
|
||||
for (ky = 0; ky < 4; ky++) {
|
||||
for (kz = 0; kz < 4; kz++) {
|
||||
index = kx * 16 + ky * 4 + kz;
|
||||
PME_kxyz_cpu[index].uint_x = kx;
|
||||
PME_kxyz_cpu[index].uint_y = ky;
|
||||
PME_kxyz_cpu[index].uint_z = kz;
|
||||
}
|
||||
}
|
||||
}
|
||||
B1.resize(fftx);
|
||||
B2.resize(ffty);
|
||||
B3.resize(fftz);
|
||||
PME_BC0.resize(PME_Nfft);
|
||||
|
||||
for (kx = 0; kx < fftx; kx++) {
|
||||
B1[kx] = getb(kx, fftx, 4);
|
||||
}
|
||||
|
||||
for (ky = 0; ky < ffty; ky++) {
|
||||
B2[ky] = getb(ky, ffty, 4);
|
||||
}
|
||||
|
||||
for (kz = 0; kz < fftz; kz++) {
|
||||
B3[kz] = getb(kz, fftz, 4);
|
||||
}
|
||||
float mprefactor = PI * PI / -beta / beta;
|
||||
float msq;
|
||||
for (kx = 0; kx < fftx; kx++) {
|
||||
kxrp = kx;
|
||||
if (kx > fftx / 2) kxrp = fftx - kx;
|
||||
for (ky = 0; ky < ffty; ky++) {
|
||||
kyrp = ky;
|
||||
if (ky > ffty / 2) kyrp = ffty - ky;
|
||||
for (kz = 0; kz <= fftz / 2; kz++) {
|
||||
kzrp = kz;
|
||||
|
||||
msq = kxrp * kxrp / box_length[0].x / box_length[0].x + kyrp * kyrp / box_length[0].y / box_length[0].y +
|
||||
kzrp * kzrp / box_length[0].z / box_length[0].z;
|
||||
index = kx * ffty * (fftz / 2 + 1) + ky * (fftz / 2 + 1) + kz;
|
||||
if ((kx + ky + kz) == 0) {
|
||||
PME_BC0[index] = 0;
|
||||
} else {
|
||||
PME_BC0[index] = 1.0 / PI / msq * exp(mprefactor * msq) / volume;
|
||||
}
|
||||
|
||||
PME_BC0[index] *= B1[kx] * B2[ky] * B3[kz];
|
||||
}
|
||||
}
|
||||
}
|
||||
PME_BC = PME_BC0;
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
|
||||
const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
|
||||
const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
|
||||
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
|
||||
|
||||
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
|
||||
auto uint_crd = GetDeviceAddress<const T1>(inputs, 0);
|
||||
auto charge = GetDeviceAddress<T>(inputs, 1);
|
||||
auto d_beta = GetDeviceAddress<T>(inputs, 2);
|
||||
|
||||
auto pme_uxyz = GetDeviceAddress<int>(workspace, 0); // workspace
|
||||
auto pme_frxyz = GetDeviceAddress<float>(workspace, 1); // workspace
|
||||
auto pme_q = GetDeviceAddress<T>(workspace, 2); // workspace
|
||||
auto pme_fq = GetDeviceAddress<float>(workspace, 3); // workspace
|
||||
auto pme_atom_near = GetDeviceAddress<int>(workspace, 4); // workspace
|
||||
auto pme_bc = GetDeviceAddress<float>(workspace, 5); // workspace
|
||||
auto pme_kxyz = GetDeviceAddress<int>(workspace, 6); // workspace
|
||||
|
||||
auto force = GetDeviceAddress<T>(outputs, 0);
|
||||
h_beta = beta;
|
||||
if (need_update) {
|
||||
cudaMemcpyAsync(&h_beta, d_beta, sizeof(float), cudaMemcpyDeviceToHost,
|
||||
reinterpret_cast<cudaStream_t>(stream_ptr));
|
||||
cudaStreamSynchronize(reinterpret_cast<cudaStream_t>(stream_ptr));
|
||||
factor = h_beta / beta;
|
||||
double factor_inverse = 1.0 / factor;
|
||||
|
||||
PME_inverse_box_vector.x = PME_inverse_box_vector0.x * factor_inverse;
|
||||
PME_inverse_box_vector.y = PME_inverse_box_vector0.y * factor_inverse;
|
||||
PME_inverse_box_vector.z = PME_inverse_box_vector0.z * factor_inverse;
|
||||
|
||||
for (int i = 0; i < PME_Nfft; i++) {
|
||||
PME_BC[i] = PME_BC0[i] * factor_inverse;
|
||||
}
|
||||
}
|
||||
|
||||
cufftSetStream(PME_plan_r2c, reinterpret_cast<cudaStream_t>(stream_ptr));
|
||||
cufftSetStream(PME_plan_c2r, reinterpret_cast<cudaStream_t>(stream_ptr));
|
||||
cudaMemcpyAsync(pme_kxyz, PME_kxyz_cpu.data(), sizeof(UNSIGNED_INT_VECTOR) * 64, cudaMemcpyHostToDevice,
|
||||
reinterpret_cast<cudaStream_t>(stream_ptr));
|
||||
cudaMemcpyAsync(pme_bc, PME_BC.data(), sizeof(float) * PME_Nfft, cudaMemcpyHostToDevice,
|
||||
reinterpret_cast<cudaStream_t>(stream_ptr));
|
||||
PMEReciprocalForce(fftx, ffty, fftz, atom_numbers, beta, pme_bc, pme_uxyz, pme_frxyz, pme_q, pme_fq, pme_atom_near,
|
||||
pme_kxyz, uint_crd, charge, force, PME_Nin, PME_Nall, PME_Nfft, PME_plan_r2c, PME_plan_c2r,
|
||||
PME_inverse_box_vector, reinterpret_cast<cudaStream_t>(stream_ptr));
|
||||
return true;
|
||||
}
|
||||
|
||||
protected:
|
||||
void InitSizeLists() override {
|
||||
input_size_list_.push_back(atom_numbers * sizeof(UNSIGNED_INT_VECTOR));
|
||||
input_size_list_.push_back(atom_numbers * sizeof(float));
|
||||
input_size_list_.push_back(sizeof(float));
|
||||
|
||||
workspace_size_list_.push_back(atom_numbers * sizeof(UNSIGNED_INT_VECTOR));
|
||||
workspace_size_list_.push_back(atom_numbers * sizeof(VECTOR));
|
||||
workspace_size_list_.push_back(PME_Nall * sizeof(T));
|
||||
workspace_size_list_.push_back(PME_Nfft * sizeof(cufftComplex));
|
||||
workspace_size_list_.push_back(atom_numbers * 64 * sizeof(int));
|
||||
workspace_size_list_.push_back(PME_Nfft * sizeof(float));
|
||||
workspace_size_list_.push_back(64 * sizeof(UNSIGNED_INT_VECTOR));
|
||||
|
||||
output_size_list_.push_back(atom_numbers * sizeof(VECTOR));
|
||||
}
|
||||
|
||||
cufftComplex expc(cufftComplex z) {
|
||||
cufftComplex res;
|
||||
float t = expf(z.x);
|
||||
sincosf(z.y, &res.y, &res.x);
|
||||
res.x *= t;
|
||||
res.y *= t;
|
||||
return res;
|
||||
}
|
||||
|
||||
float M_(float u, int n) {
|
||||
if (n == 2) {
|
||||
if (u > 2 || u < 0) return 0;
|
||||
return 1 - abs(u - 1);
|
||||
} else {
|
||||
return u / (n - 1) * M_(u, n - 1) + (n - u) / (n - 1) * M_(u - 1, n - 1);
|
||||
}
|
||||
}
|
||||
|
||||
float getb(int k, int NFFT, int B_order) {
|
||||
cufftComplex tempc, tempc2, res;
|
||||
float tempf;
|
||||
tempc2.x = 0;
|
||||
tempc2.y = 0;
|
||||
|
||||
tempc.x = 0;
|
||||
if (NFFT == 0) {
|
||||
MS_LOG(ERROR) << "Divide by zero.";
|
||||
} else {
|
||||
tempc.y = 2 * (B_order - 1) * PI * k / NFFT;
|
||||
}
|
||||
res = expc(tempc);
|
||||
|
||||
for (int kk = 0; kk < (B_order - 1); kk++) {
|
||||
tempc.x = 0;
|
||||
if (NFFT == 0) {
|
||||
MS_LOG(ERROR) << "Divide by zero.";
|
||||
break;
|
||||
} else {
|
||||
tempc.y = 2 * PI * k / NFFT * kk;
|
||||
}
|
||||
tempc = expc(tempc);
|
||||
tempf = M_(kk + 1, B_order);
|
||||
tempc2.x += tempf * tempc.x;
|
||||
tempc2.y += tempf * tempc.y;
|
||||
}
|
||||
res = cuCdivf(res, tempc2);
|
||||
return res.x * res.x + res.y * res.y;
|
||||
}
|
||||
|
||||
private:
|
||||
size_t ele_uint_crd = 1;
|
||||
|
||||
std::vector<size_t> input_size_list_;
|
||||
std::vector<size_t> output_size_list_;
|
||||
std::vector<size_t> workspace_size_list_;
|
||||
int atom_numbers;
|
||||
int fftx;
|
||||
int ffty;
|
||||
int fftz;
|
||||
float beta;
|
||||
float h_beta;
|
||||
float factor;
|
||||
int PME_Nall;
|
||||
int PME_Nfft;
|
||||
int PME_Nin;
|
||||
float PI = 3.1415926;
|
||||
int need_update;
|
||||
std::vector<float> B1;
|
||||
std::vector<float> B2;
|
||||
std::vector<float> B3;
|
||||
std::vector<float> PME_BC0;
|
||||
std::vector<float> PME_BC;
|
||||
|
||||
cufftHandle PME_plan_r2c;
|
||||
cufftHandle PME_plan_c2r;
|
||||
struct VECTOR {
|
||||
float x;
|
||||
float y;
|
||||
float z;
|
||||
};
|
||||
_VECTOR PME_inverse_box_vector0;
|
||||
_VECTOR PME_inverse_box_vector;
|
||||
struct UNSIGNED_INT_VECTOR {
|
||||
unsigned int uint_x;
|
||||
unsigned int uint_y;
|
||||
unsigned int uint_z;
|
||||
};
|
||||
std::vector<UNSIGNED_INT_VECTOR> PME_kxyz_cpu;
|
||||
};
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
#endif
|
|
@ -13,12 +13,16 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
/**
|
||||
*Note:
|
||||
* ConstrainForceCycleWithVirial. This is an experimental interface that is subject to change and/or deletion.
|
||||
*/
|
||||
|
||||
#include "backend/kernel_compiler/gpu/sponge/simple_constrain/constrain_force_cycle_with_virial_kernel.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
MS_REG_GPU_KERNEL_THREE(constrainforcecyclewithvirial,
|
||||
MS_REG_GPU_KERNEL_THREE(ConstrainForceCycleWithVirial,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeUInt32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
|
|
|
@ -0,0 +1,41 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
/**
|
||||
*Note:
|
||||
* ConstrainForce. This is an experimental interface that is subject to change and/or deletion.
|
||||
*/
|
||||
#include "backend/kernel_compiler/gpu/sponge/simple_constrain/constrain_force_kernel.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
MS_REG_GPU_KERNEL_THREE(ConstrainForce,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddOutputAttr(kNumberTypeUInt32)
|
||||
.AddOutputAttr(kNumberTypeFloat32)
|
||||
.AddOutputAttr(kNumberTypeFloat32),
|
||||
ConstrainForceGpuKernel, float, int, unsigned int)
|
||||
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
|
@ -0,0 +1,162 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
/**
|
||||
*Note:
|
||||
* ConstrainForce. This is an experimental interface that is subject to change and/or deletion.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_SIMPLE_CONSTRAIN_CONSTRAIN_FORCE_KERNEL_H_
|
||||
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_SIMPLE_CONSTRAIN_CONSTRAIN_FORCE_KERNEL_H_
|
||||
|
||||
#include "backend/kernel_compiler/gpu/cuda_impl/sponge/simple_constrain/constrain_force_virial_impl.cuh"
|
||||
|
||||
#include <cuda_runtime_api.h>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
|
||||
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
|
||||
#include "runtime/device/gpu/cuda_common.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
template <typename T, typename T1, typename T2>
|
||||
class ConstrainForceGpuKernel : public GpuKernel {
|
||||
public:
|
||||
ConstrainForceGpuKernel() : ele_crd(1) {}
|
||||
~ConstrainForceGpuKernel() override = default;
|
||||
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
kernel_node_ = kernel_node;
|
||||
atom_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "atom_numbers"));
|
||||
constrain_pair_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "constrain_pair_numbers"));
|
||||
iteration_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "iteration_numbers"));
|
||||
half_exp_gamma_plus_half = static_cast<float>(GetAttr<float>(kernel_node, "half_exp_gamma_plus_half"));
|
||||
|
||||
auto shape_crd = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
auto shape_quarter_cof = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
|
||||
auto shape_mass_inverse = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2);
|
||||
|
||||
auto shape_scaler = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3);
|
||||
auto shape_pair_dr = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 4);
|
||||
auto shape_atom_i_serials = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 5);
|
||||
auto shape_atom_j_serials = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 6);
|
||||
auto shape_constant_rs = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 7);
|
||||
auto shape_constrain_ks = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 8);
|
||||
|
||||
for (size_t i = 0; i < shape_scaler.size(); i++) ele_scaler *= shape_scaler[i];
|
||||
for (size_t i = 0; i < shape_pair_dr.size(); i++) ele_pair_dr *= shape_pair_dr[i];
|
||||
for (size_t i = 0; i < shape_atom_i_serials.size(); i++) ele_atom_i_serials *= shape_atom_i_serials[i];
|
||||
for (size_t i = 0; i < shape_atom_j_serials.size(); i++) ele_atom_j_serials *= shape_atom_j_serials[i];
|
||||
for (size_t i = 0; i < shape_constant_rs.size(); i++) ele_constant_rs *= shape_constant_rs[i];
|
||||
for (size_t i = 0; i < shape_constrain_ks.size(); i++) ele_constrain_ks *= shape_constrain_ks[i];
|
||||
|
||||
for (size_t i = 0; i < shape_crd.size(); i++) ele_crd *= shape_crd[i];
|
||||
for (size_t i = 0; i < shape_quarter_cof.size(); i++) ele_quarter_cof *= shape_quarter_cof[i];
|
||||
for (size_t i = 0; i < shape_mass_inverse.size(); i++) ele_mass_inverse *= shape_mass_inverse[i];
|
||||
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
|
||||
const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
|
||||
const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
|
||||
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
|
||||
|
||||
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
|
||||
auto crd = GetDeviceAddress<const T>(inputs, 0);
|
||||
auto quarter_cof = GetDeviceAddress<const T>(inputs, 1);
|
||||
auto mass_inverse = GetDeviceAddress<const T>(inputs, 2);
|
||||
|
||||
auto scaler = GetDeviceAddress<const T>(inputs, 3);
|
||||
auto pair_dr = GetDeviceAddress<const T>(inputs, 4);
|
||||
auto atom_i_serials = GetDeviceAddress<const T1>(inputs, 5);
|
||||
auto atom_j_serials = GetDeviceAddress<const T1>(inputs, 6);
|
||||
auto constant_rs = GetDeviceAddress<const T>(inputs, 7);
|
||||
auto constrain_ks = GetDeviceAddress<const T>(inputs, 8);
|
||||
|
||||
auto constrain_pair = GetDeviceAddress<T>(workspace, 0);
|
||||
|
||||
auto uint_crd = GetDeviceAddress<T2>(outputs, 0);
|
||||
|
||||
auto test_frc_f = GetDeviceAddress<T>(outputs, 1);
|
||||
auto d_atom_virial = GetDeviceAddress<T>(outputs, 2);
|
||||
|
||||
set_zero_force_with_virial(atom_numbers, constrain_pair_numbers, test_frc_f, d_atom_virial,
|
||||
reinterpret_cast<cudaStream_t>(stream_ptr));
|
||||
|
||||
for (int i = 0; i < iteration_numbers; i++) {
|
||||
refresh_uint_crd_update(atom_numbers, half_exp_gamma_plus_half, crd, quarter_cof, test_frc_f, mass_inverse,
|
||||
uint_crd, reinterpret_cast<cudaStream_t>(stream_ptr));
|
||||
|
||||
constrain_force_cycle_update(atom_numbers, constrain_pair_numbers, uint_crd, scaler, constrain_pair, pair_dr,
|
||||
atom_i_serials, atom_j_serials, constant_rs, constrain_ks, test_frc_f,
|
||||
reinterpret_cast<cudaStream_t>(stream_ptr));
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
protected:
|
||||
void InitSizeLists() override {
|
||||
input_size_list_.push_back(ele_crd * sizeof(T));
|
||||
input_size_list_.push_back(ele_quarter_cof * sizeof(T));
|
||||
input_size_list_.push_back(ele_mass_inverse * sizeof(T));
|
||||
|
||||
input_size_list_.push_back(ele_scaler * sizeof(T));
|
||||
input_size_list_.push_back(ele_pair_dr * sizeof(T));
|
||||
input_size_list_.push_back(ele_atom_i_serials * sizeof(T1));
|
||||
input_size_list_.push_back(ele_atom_j_serials * sizeof(T1));
|
||||
input_size_list_.push_back(ele_constant_rs * sizeof(T));
|
||||
input_size_list_.push_back(ele_constrain_ks * sizeof(T));
|
||||
|
||||
workspace_size_list_.push_back(constrain_pair_numbers * sizeof(CONSTRAIN_PAIR));
|
||||
|
||||
output_size_list_.push_back(3 * atom_numbers * sizeof(T2));
|
||||
output_size_list_.push_back(3 * atom_numbers * sizeof(T));
|
||||
output_size_list_.push_back(constrain_pair_numbers * sizeof(T));
|
||||
}
|
||||
|
||||
private:
|
||||
size_t ele_scaler = 1;
|
||||
size_t ele_pair_dr = 1;
|
||||
size_t ele_atom_i_serials = 1;
|
||||
size_t ele_atom_j_serials = 1;
|
||||
size_t ele_constant_rs = 1;
|
||||
size_t ele_constrain_ks = 1;
|
||||
size_t ele_crd = 1;
|
||||
size_t ele_quarter_cof = 1;
|
||||
size_t ele_mass_inverse = 1;
|
||||
|
||||
std::vector<size_t> input_size_list_;
|
||||
std::vector<size_t> output_size_list_;
|
||||
std::vector<size_t> workspace_size_list_;
|
||||
int atom_numbers;
|
||||
int constrain_pair_numbers;
|
||||
int iteration_numbers;
|
||||
int need_pressure;
|
||||
float half_exp_gamma_plus_half;
|
||||
struct CONSTRAIN_PAIR {
|
||||
int atom_i_serial;
|
||||
int atom_j_serial;
|
||||
float constant_r;
|
||||
float constrain_k;
|
||||
};
|
||||
};
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
#endif
|
|
@ -0,0 +1,42 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
/**
|
||||
*Note:
|
||||
* ConstrainForceVirial. This is an experimental interface that is subject to change and/or deletion.
|
||||
*/
|
||||
|
||||
#include "backend/kernel_compiler/gpu/sponge/simple_constrain/constrain_force_virial_kernel.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
MS_REG_GPU_KERNEL_THREE(ConstrainForceVirial,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddOutputAttr(kNumberTypeUInt32)
|
||||
.AddOutputAttr(kNumberTypeFloat32)
|
||||
.AddOutputAttr(kNumberTypeFloat32),
|
||||
ConstrainForceVirialGpuKernel, float, int, unsigned int)
|
||||
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
|
@ -0,0 +1,159 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
/**
|
||||
*Note:
|
||||
* ConstrainForceVirial. This is an experimental interface that is subject to change and/or deletion.
|
||||
*/
|
||||
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_SIMPLE_CONSTRAIN_CONSTRAIN_FORCE_VIRIAL_KERNEL_H_
|
||||
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_SIMPLE_CONSTRAIN_CONSTRAIN_FORCE_VIRIAL_KERNEL_H_
|
||||
|
||||
#include "backend/kernel_compiler/gpu/cuda_impl/sponge/simple_constrain/constrain_force_virial_impl.cuh"
|
||||
|
||||
#include <cuda_runtime_api.h>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
|
||||
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
|
||||
#include "runtime/device/gpu/cuda_common.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
template <typename T, typename T1, typename T2>
|
||||
class ConstrainForceVirialGpuKernel : public GpuKernel {
|
||||
public:
|
||||
ConstrainForceVirialGpuKernel() : ele_crd(1) {}
|
||||
~ConstrainForceVirialGpuKernel() override = default;
|
||||
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
kernel_node_ = kernel_node;
|
||||
atom_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "atom_numbers"));
|
||||
constrain_pair_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "constrain_pair_numbers"));
|
||||
iteration_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "iteration_numbers"));
|
||||
half_exp_gamma_plus_half = static_cast<float>(GetAttr<float>(kernel_node, "half_exp_gamma_plus_half"));
|
||||
|
||||
auto shape_crd = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
auto shape_quarter_cof = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
|
||||
auto shape_mass_inverse = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2);
|
||||
|
||||
auto shape_scaler = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3);
|
||||
auto shape_pair_dr = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 4);
|
||||
auto shape_atom_i_serials = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 5);
|
||||
auto shape_atom_j_serials = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 6);
|
||||
auto shape_constant_rs = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 7);
|
||||
auto shape_constrain_ks = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 8);
|
||||
|
||||
for (size_t i = 0; i < shape_scaler.size(); i++) ele_scaler *= shape_scaler[i];
|
||||
for (size_t i = 0; i < shape_pair_dr.size(); i++) ele_pair_dr *= shape_pair_dr[i];
|
||||
for (size_t i = 0; i < shape_atom_i_serials.size(); i++) ele_atom_i_serials *= shape_atom_i_serials[i];
|
||||
for (size_t i = 0; i < shape_atom_j_serials.size(); i++) ele_atom_j_serials *= shape_atom_j_serials[i];
|
||||
for (size_t i = 0; i < shape_constant_rs.size(); i++) ele_constant_rs *= shape_constant_rs[i];
|
||||
for (size_t i = 0; i < shape_constrain_ks.size(); i++) ele_constrain_ks *= shape_constrain_ks[i];
|
||||
|
||||
for (size_t i = 0; i < shape_crd.size(); i++) ele_crd *= shape_crd[i];
|
||||
for (size_t i = 0; i < shape_quarter_cof.size(); i++) ele_quarter_cof *= shape_quarter_cof[i];
|
||||
for (size_t i = 0; i < shape_mass_inverse.size(); i++) ele_mass_inverse *= shape_mass_inverse[i];
|
||||
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
|
||||
const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
|
||||
const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
|
||||
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
|
||||
|
||||
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
|
||||
auto crd = GetDeviceAddress<const T>(inputs, 0);
|
||||
auto quarter_cof = GetDeviceAddress<const T>(inputs, 1);
|
||||
auto mass_inverse = GetDeviceAddress<const T>(inputs, 2);
|
||||
auto scaler = GetDeviceAddress<const T>(inputs, 3);
|
||||
auto pair_dr = GetDeviceAddress<const T>(inputs, 4);
|
||||
auto atom_i_serials = GetDeviceAddress<const T1>(inputs, 5);
|
||||
auto atom_j_serials = GetDeviceAddress<const T1>(inputs, 6);
|
||||
auto constant_rs = GetDeviceAddress<const T>(inputs, 7);
|
||||
auto constrain_ks = GetDeviceAddress<const T>(inputs, 8);
|
||||
auto constrain_pair = GetDeviceAddress<T>(workspace, 0);
|
||||
auto uint_crd = GetDeviceAddress<T2>(outputs, 0);
|
||||
auto test_frc_f = GetDeviceAddress<T>(outputs, 1);
|
||||
auto d_atom_virial = GetDeviceAddress<T>(outputs, 2);
|
||||
|
||||
set_zero_force_with_virial(atom_numbers, constrain_pair_numbers, test_frc_f, d_atom_virial,
|
||||
reinterpret_cast<cudaStream_t>(stream_ptr));
|
||||
|
||||
for (int i = 0; i < iteration_numbers; i++) {
|
||||
refresh_uint_crd_update(atom_numbers, half_exp_gamma_plus_half, crd, quarter_cof, test_frc_f, mass_inverse,
|
||||
uint_crd, reinterpret_cast<cudaStream_t>(stream_ptr));
|
||||
|
||||
constrain_force_cycle_with_virial_update(atom_numbers, constrain_pair_numbers, uint_crd, scaler, constrain_pair,
|
||||
pair_dr, atom_i_serials, atom_j_serials, constant_rs, constrain_ks,
|
||||
test_frc_f, d_atom_virial, reinterpret_cast<cudaStream_t>(stream_ptr));
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
protected:
|
||||
void InitSizeLists() override {
|
||||
input_size_list_.push_back(ele_crd * sizeof(T));
|
||||
input_size_list_.push_back(ele_quarter_cof * sizeof(T));
|
||||
input_size_list_.push_back(ele_mass_inverse * sizeof(T));
|
||||
|
||||
input_size_list_.push_back(ele_scaler * sizeof(T));
|
||||
input_size_list_.push_back(ele_pair_dr * sizeof(T));
|
||||
input_size_list_.push_back(ele_atom_i_serials * sizeof(T1));
|
||||
input_size_list_.push_back(ele_atom_j_serials * sizeof(T1));
|
||||
input_size_list_.push_back(ele_constant_rs * sizeof(T));
|
||||
input_size_list_.push_back(ele_constrain_ks * sizeof(T));
|
||||
|
||||
workspace_size_list_.push_back(constrain_pair_numbers * sizeof(CONSTRAIN_PAIR));
|
||||
|
||||
output_size_list_.push_back(3 * atom_numbers * sizeof(T2));
|
||||
|
||||
output_size_list_.push_back(3 * atom_numbers * sizeof(T));
|
||||
output_size_list_.push_back(constrain_pair_numbers * sizeof(T));
|
||||
}
|
||||
|
||||
private:
|
||||
size_t ele_scaler = 1;
|
||||
size_t ele_pair_dr = 1;
|
||||
size_t ele_atom_i_serials = 1;
|
||||
size_t ele_atom_j_serials = 1;
|
||||
size_t ele_constant_rs = 1;
|
||||
size_t ele_constrain_ks = 1;
|
||||
size_t ele_crd = 1;
|
||||
size_t ele_quarter_cof = 1;
|
||||
size_t ele_mass_inverse = 1;
|
||||
|
||||
std::vector<size_t> input_size_list_;
|
||||
std::vector<size_t> output_size_list_;
|
||||
std::vector<size_t> workspace_size_list_;
|
||||
int atom_numbers;
|
||||
int constrain_pair_numbers;
|
||||
int iteration_numbers;
|
||||
int need_pressure;
|
||||
float half_exp_gamma_plus_half;
|
||||
struct CONSTRAIN_PAIR {
|
||||
int atom_i_serial;
|
||||
int atom_j_serial;
|
||||
float constant_r;
|
||||
float constrain_k;
|
||||
};
|
||||
};
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
#endif
|
|
@ -0,0 +1,42 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
/**
|
||||
*Note:
|
||||
* Constrain. This is an experimental interface that is subject to change and/or deletion.
|
||||
*/
|
||||
#include "backend/kernel_compiler/gpu/sponge/simple_constrain/constrain_kernel.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
MS_REG_GPU_KERNEL_THREE(Constrain,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddOutputAttr(kNumberTypeUInt32)
|
||||
.AddOutputAttr(kNumberTypeFloat32)
|
||||
.AddOutputAttr(kNumberTypeFloat32),
|
||||
ConstrainGpuKernel, float, int, unsigned int)
|
||||
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
|
@ -0,0 +1,176 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
/**
|
||||
*Note:
|
||||
* Constrain. This is an experimental interface that is subject to change and/or deletion.
|
||||
*/
|
||||
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_SIMPLE_CONSTRAIN_CONSTRAIN_KERNEL_H_
|
||||
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_SIMPLE_CONSTRAIN_CONSTRAIN_KERNEL_H_
|
||||
|
||||
#include "backend/kernel_compiler/gpu/cuda_impl/sponge/simple_constrain/constrain_force_virial_impl.cuh"
|
||||
|
||||
#include <cuda_runtime_api.h>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
|
||||
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
|
||||
#include "runtime/device/gpu/cuda_common.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
template <typename T, typename T1, typename T2>
|
||||
class ConstrainGpuKernel : public GpuKernel {
|
||||
public:
|
||||
ConstrainGpuKernel() : ele_crd(1), step(1) {}
|
||||
~ConstrainGpuKernel() override = default;
|
||||
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
// get bond_numbers
|
||||
kernel_node_ = kernel_node;
|
||||
atom_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "atom_numbers"));
|
||||
constrain_pair_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "constrain_pair_numbers"));
|
||||
iteration_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "iteration_numbers"));
|
||||
half_exp_gamma_plus_half = static_cast<float>(GetAttr<float>(kernel_node, "half_exp_gamma_plus_half"));
|
||||
|
||||
auto shape_crd = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
auto shape_quarter_cof = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
|
||||
auto shape_mass_inverse = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2);
|
||||
|
||||
auto shape_scaler = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3);
|
||||
auto shape_pair_dr = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 4);
|
||||
auto shape_atom_i_serials = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 5);
|
||||
auto shape_atom_j_serials = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 6);
|
||||
auto shape_constant_rs = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 7);
|
||||
auto shape_constrain_ks = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 8);
|
||||
|
||||
for (size_t i = 0; i < shape_scaler.size(); i++) ele_scaler *= shape_scaler[i];
|
||||
for (size_t i = 0; i < shape_pair_dr.size(); i++) ele_pair_dr *= shape_pair_dr[i];
|
||||
for (size_t i = 0; i < shape_atom_i_serials.size(); i++) ele_atom_i_serials *= shape_atom_i_serials[i];
|
||||
for (size_t i = 0; i < shape_atom_j_serials.size(); i++) ele_atom_j_serials *= shape_atom_j_serials[i];
|
||||
for (size_t i = 0; i < shape_constant_rs.size(); i++) ele_constant_rs *= shape_constant_rs[i];
|
||||
for (size_t i = 0; i < shape_constrain_ks.size(); i++) ele_constrain_ks *= shape_constrain_ks[i];
|
||||
|
||||
for (size_t i = 0; i < shape_crd.size(); i++) ele_crd *= shape_crd[i];
|
||||
for (size_t i = 0; i < shape_quarter_cof.size(); i++) ele_quarter_cof *= shape_quarter_cof[i];
|
||||
for (size_t i = 0; i < shape_mass_inverse.size(); i++) ele_mass_inverse *= shape_mass_inverse[i];
|
||||
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
|
||||
const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
|
||||
const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
|
||||
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
|
||||
|
||||
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
|
||||
auto crd = GetDeviceAddress<const T>(inputs, 0);
|
||||
auto quarter_cof = GetDeviceAddress<const T>(inputs, 1);
|
||||
auto mass_inverse = GetDeviceAddress<const T>(inputs, 2);
|
||||
auto scaler = GetDeviceAddress<const T>(inputs, 3);
|
||||
auto pair_dr = GetDeviceAddress<const T>(inputs, 4);
|
||||
auto atom_i_serials = GetDeviceAddress<const T1>(inputs, 5);
|
||||
auto atom_j_serials = GetDeviceAddress<const T1>(inputs, 6);
|
||||
auto constant_rs = GetDeviceAddress<const T>(inputs, 7);
|
||||
auto constrain_ks = GetDeviceAddress<const T>(inputs, 8);
|
||||
auto d_need_pressure = GetDeviceAddress<T1>(inputs, 9);
|
||||
|
||||
auto constrain_pair = GetDeviceAddress<T>(workspace, 0);
|
||||
|
||||
auto uint_crd = GetDeviceAddress<T2>(outputs, 0);
|
||||
|
||||
auto test_frc_f = GetDeviceAddress<T>(outputs, 1);
|
||||
auto d_atom_virial = GetDeviceAddress<T>(outputs, 2);
|
||||
|
||||
cudaMemcpyAsync(&need_pressure, d_need_pressure, sizeof(int), cudaMemcpyDeviceToHost,
|
||||
reinterpret_cast<cudaStream_t>(stream_ptr));
|
||||
cudaStreamSynchronize(reinterpret_cast<cudaStream_t>(stream_ptr));
|
||||
|
||||
set_zero_force_with_virial(atom_numbers, constrain_pair_numbers, test_frc_f, d_atom_virial,
|
||||
reinterpret_cast<cudaStream_t>(stream_ptr));
|
||||
|
||||
for (int i = 0; i < iteration_numbers; i++) {
|
||||
refresh_uint_crd_update(atom_numbers, half_exp_gamma_plus_half, crd, quarter_cof, test_frc_f, mass_inverse,
|
||||
uint_crd, reinterpret_cast<cudaStream_t>(stream_ptr));
|
||||
|
||||
if (need_pressure) {
|
||||
constrain_force_cycle_with_virial_update(atom_numbers, constrain_pair_numbers, uint_crd, scaler, constrain_pair,
|
||||
pair_dr, atom_i_serials, atom_j_serials, constant_rs, constrain_ks,
|
||||
test_frc_f, d_atom_virial, reinterpret_cast<cudaStream_t>(stream_ptr));
|
||||
} else {
|
||||
constrain_force_cycle_update(atom_numbers, constrain_pair_numbers, uint_crd, scaler, constrain_pair, pair_dr,
|
||||
atom_i_serials, atom_j_serials, constant_rs, constrain_ks, test_frc_f,
|
||||
reinterpret_cast<cudaStream_t>(stream_ptr));
|
||||
}
|
||||
}
|
||||
step++;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
protected:
|
||||
void InitSizeLists() override {
|
||||
input_size_list_.push_back(ele_crd * sizeof(T));
|
||||
input_size_list_.push_back(ele_quarter_cof * sizeof(T));
|
||||
input_size_list_.push_back(ele_mass_inverse * sizeof(T));
|
||||
|
||||
input_size_list_.push_back(ele_scaler * sizeof(T));
|
||||
input_size_list_.push_back(ele_pair_dr * sizeof(T));
|
||||
input_size_list_.push_back(ele_atom_i_serials * sizeof(T1));
|
||||
input_size_list_.push_back(ele_atom_j_serials * sizeof(T1));
|
||||
input_size_list_.push_back(ele_constant_rs * sizeof(T));
|
||||
input_size_list_.push_back(ele_constrain_ks * sizeof(T));
|
||||
|
||||
workspace_size_list_.push_back(constrain_pair_numbers * sizeof(CONSTRAIN_PAIR));
|
||||
|
||||
output_size_list_.push_back(3 * atom_numbers * sizeof(T2));
|
||||
|
||||
output_size_list_.push_back(3 * atom_numbers * sizeof(T));
|
||||
output_size_list_.push_back(constrain_pair_numbers * sizeof(T));
|
||||
}
|
||||
|
||||
private:
|
||||
size_t ele_scaler = 1;
|
||||
size_t ele_pair_dr = 1;
|
||||
size_t ele_atom_i_serials = 1;
|
||||
size_t ele_atom_j_serials = 1;
|
||||
size_t ele_constant_rs = 1;
|
||||
size_t ele_constrain_ks = 1;
|
||||
size_t ele_crd = 1;
|
||||
size_t ele_quarter_cof = 1;
|
||||
size_t ele_mass_inverse = 1;
|
||||
|
||||
std::vector<size_t> input_size_list_;
|
||||
std::vector<size_t> output_size_list_;
|
||||
std::vector<size_t> workspace_size_list_;
|
||||
int atom_numbers;
|
||||
int constrain_pair_numbers;
|
||||
int iteration_numbers;
|
||||
int need_pressure;
|
||||
float half_exp_gamma_plus_half;
|
||||
int step;
|
||||
struct CONSTRAIN_PAIR {
|
||||
int atom_i_serial;
|
||||
int atom_j_serial;
|
||||
float constant_r;
|
||||
float constrain_k;
|
||||
};
|
||||
};
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
#endif
|
|
@ -13,12 +13,15 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
*Note:
|
||||
* LastCrdToDr. This is an experimental interface that is subject to change and/or deletion.
|
||||
*/
|
||||
#include "backend/kernel_compiler/gpu/sponge/simple_constrain/last_crd_to_dr_kernel.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
MS_REG_GPU_KERNEL_TWO(lastcrdtodr,
|
||||
MS_REG_GPU_KERNEL_TWO(LastCrdToDr,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
|
|
|
@ -13,12 +13,16 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
/**
|
||||
*Note:
|
||||
* RefreshCrdVel. This is an experimental interface that is subject to change and/or deletion.
|
||||
*/
|
||||
|
||||
#include "backend/kernel_compiler/gpu/sponge/simple_constrain/refresh_crd_vel_kernel.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
MS_REG_GPU_KERNEL_TWO(refreshcrdvel,
|
||||
MS_REG_GPU_KERNEL_TWO(RefreshCrdVel,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
|
|
|
@ -13,12 +13,15 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
*Note:
|
||||
* RefreshUintCrd. This is an experimental interface that is subject to change and/or deletion.
|
||||
*/
|
||||
#include "backend/kernel_compiler/gpu/sponge/simple_constrain/refresh_uint_crd_kernel.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
MS_REG_GPU_KERNEL_TWO(refreshuintcrd,
|
||||
MS_REG_GPU_KERNEL_TWO(RefreshUintCrd,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
|
|
Loading…
Reference in New Issue