!44925 remove sponge ops

Merge pull request !44925 from mamba_ni/remove_sponge
This commit is contained in:
i-robot 2022-11-02 01:12:56 +00:00 committed by Gitee
commit cf8e460295
No known key found for this signature in database
GPG Key ID: 173E9B9CA92EEF8F
336 changed files with 0 additions and 36415 deletions

View File

@ -1,66 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "plugin/device/gpu/kernel/cuda_impl/sponge/angle/angle_atom_energy_impl.cuh"
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/util.cuh"
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common_sponge.cuh"
__global__ void AngleAtomEnergyKernel(int angle_numbers, const UNSIGNED_INT_VECTOR *uint_crd, const VECTOR *scaler,
const int *atom_a, const int *atom_b, const int *atom_c, const float *angle_k,
const float *angle_theta0, float *atom_energy) {
int angle_i = blockDim.x * blockIdx.x + threadIdx.x;
if (angle_i < angle_numbers) {
int atom_i = atom_a[angle_i];
int atom_j = atom_b[angle_i];
int atom_k = atom_c[angle_i];
float theta0 = angle_theta0[angle_i];
float k = angle_k[angle_i];
VECTOR drij = Get_Periodic_Displacement(uint_crd[atom_i], uint_crd[atom_j], scaler[0]);
VECTOR drkj = Get_Periodic_Displacement(uint_crd[atom_k], uint_crd[atom_j], scaler[0]);
float rij_2 = 1. / (drij * drij);
float rkj_2 = 1. / (drkj * drkj);
float rij_1_rkj_1 = sqrtf(rij_2 * rkj_2);
float costheta = drij * drkj * rij_1_rkj_1;
costheta = fmaxf(-0.999999, fminf(costheta, 0.999999));
float theta = acosf(costheta);
float dtheta = theta - theta0;
atomicAdd(&atom_energy[atom_i], k * dtheta * dtheta);
}
}
void AngleAtomEnergy(int angle_numbers, int atom_numbers, const int *uint_crd_f, const float *scaler_f,
const int *atom_a, const int *atom_b, const int *atom_c, const float *angle_k,
const float *angle_theta0, float *ene, cudaStream_t stream) {
Reset_List<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>(atom_numbers, ene, 0.);
size_t thread_per_block = 128;
size_t block_per_grid = ceilf(static_cast<float>(angle_numbers) / 128);
UNSIGNED_INT_VECTOR *uint_crd =
const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f));
VECTOR *scaler = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(scaler_f));
AngleAtomEnergyKernel<<<block_per_grid, thread_per_block, 0, stream>>>(angle_numbers, uint_crd, scaler, atom_a,
atom_b, atom_c, angle_k, angle_theta0, ene);
return;
}
void AngleAtomEnergy(int angle_numbers, int atom_numbers, const int *uint_crd_f, const float *scaler_f,
const int *atom_a, const int *atom_b, const int *atom_c, const float *angle_k,
const float *angle_theta0, float *ene, cudaStream_t stream);

View File

@ -1,27 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_ANGLE_ANGLE_ATOM_ENERGY_IMPL_H_
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_ANGLE_ANGLE_ATOM_ENERGY_IMPL_H_
#include <curand_kernel.h>
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
CUDA_LIB_EXPORT void AngleAtomEnergy(int angle_numbers, int atom_numbers, const int *uint_crd_f, const float *scaler_f,
const int *atom_a, const int *atom_b, const int *atom_c, const float *angle_k,
const float *angle_theta0, float *ene, cudaStream_t stream);
#endif

View File

@ -1,63 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "plugin/device/gpu/kernel/cuda_impl/sponge/angle/angle_energy_impl.cuh"
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/util.cuh"
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common_sponge.cuh"
__global__ void AngleEnergyKernel(int angle_numbers, const UNSIGNED_INT_VECTOR *uint_crd, const VECTOR *scaler,
const int *atom_a, const int *atom_b, const int *atom_c, const float *angle_k,
const float *angle_theta0, float *angle_energy) {
int angle_i = blockDim.x * blockIdx.x + threadIdx.x;
if (angle_i < angle_numbers) {
int atom_i = atom_a[angle_i];
int atom_j = atom_b[angle_i];
int atom_k = atom_c[angle_i];
float theta0 = angle_theta0[angle_i];
float k = angle_k[angle_i];
VECTOR drij = Get_Periodic_Displacement(uint_crd[atom_i], uint_crd[atom_j], scaler[0]);
VECTOR drkj = Get_Periodic_Displacement(uint_crd[atom_k], uint_crd[atom_j], scaler[0]);
float rij_2 = 1. / (drij * drij);
float rkj_2 = 1. / (drkj * drkj);
float rij_1_rkj_1 = sqrtf(rij_2 * rkj_2);
float costheta = drij * drkj * rij_1_rkj_1;
costheta = fmaxf(-0.999999, fminf(costheta, 0.999999));
float theta = acosf(costheta);
float dtheta = theta - theta0;
angle_energy[angle_i] = k * dtheta * dtheta;
}
}
void AngleEnergy(int angle_numbers, const int *uint_crd_f, const float *scaler_f, const int *atom_a, const int *atom_b,
const int *atom_c, const float *angle_k, const float *angle_theta0, float *ene, cudaStream_t stream) {
size_t thread_per_block = 128;
size_t block_per_grid = ceilf(static_cast<float>(angle_numbers) / 128);
UNSIGNED_INT_VECTOR *uint_crd =
const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f));
VECTOR *scaler = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(scaler_f));
AngleEnergyKernel<<<block_per_grid, thread_per_block, 0, stream>>>(angle_numbers, uint_crd, scaler, atom_a, atom_b,
atom_c, angle_k, angle_theta0, ene);
return;
}
void AngleEnergy(int angle_numbers, const int *uint_crd_f, const float *scaler_f, const int *atom_a, const int *atom_b,
const int *atom_c, const float *angle_k, const float *angle_theta0, float *ene, cudaStream_t stream);

View File

@ -1,27 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_ANGLE_ANGLE_ENERGY_IMPL_H_
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_ANGLE_ANGLE_ENERGY_IMPL_H_
#include <curand_kernel.h>
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
CUDA_LIB_EXPORT void AngleEnergy(int angle_numbers, const int *uint_crd_f, const float *scaler_f, const int *atom_a,
const int *atom_b, const int *atom_c, const float *angle_k, const float *angle_theta0,
float *ene, cudaStream_t stream);
#endif

View File

@ -1,86 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "plugin/device/gpu/kernel/cuda_impl/sponge/angle/angle_force_impl.cuh"
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/util.cuh"
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common_sponge.cuh"
__global__ void AngleForceKernel(int angle_numbers, const UNSIGNED_INT_VECTOR *uint_crd, const VECTOR *scaler,
const int *atom_a, const int *atom_b, const int *atom_c, const float *angle_k,
const float *angle_theta0, VECTOR *frc) {
int angle_i = blockDim.x * blockIdx.x + threadIdx.x;
if (angle_i < angle_numbers) {
int atom_i = atom_a[angle_i];
int atom_j = atom_b[angle_i];
int atom_k = atom_c[angle_i];
float theta0 = angle_theta0[angle_i];
float k = angle_k[angle_i];
VECTOR drij = Get_Periodic_Displacement(uint_crd[atom_i], uint_crd[atom_j], scaler[0]);
VECTOR drkj = Get_Periodic_Displacement(uint_crd[atom_k], uint_crd[atom_j], scaler[0]);
float rij_2 = 1. / (drij * drij);
float rkj_2 = 1. / (drkj * drkj);
float rij_1_rkj_1 = sqrtf(rij_2 * rkj_2);
float costheta = drij * drkj * rij_1_rkj_1;
costheta = fmaxf(-0.999999, fminf(costheta, 0.999999));
float theta = acosf(costheta);
float dtheta = theta - theta0;
k = -2 * k * dtheta / sinf(theta);
float common_factor_cross = k * rij_1_rkj_1;
float common_factor_self = k * costheta;
VECTOR fi = common_factor_self * rij_2 * drij - common_factor_cross * drkj;
VECTOR fk = common_factor_self * rkj_2 * drkj - common_factor_cross * drij;
atomicAdd(&frc[atom_i].x, fi.x);
atomicAdd(&frc[atom_i].y, fi.y);
atomicAdd(&frc[atom_i].z, fi.z);
atomicAdd(&frc[atom_k].x, fk.x);
atomicAdd(&frc[atom_k].y, fk.y);
atomicAdd(&frc[atom_k].z, fk.z);
fi = -fi - fk;
atomicAdd(&frc[atom_j].x, fi.x);
atomicAdd(&frc[atom_j].y, fi.y);
atomicAdd(&frc[atom_j].z, fi.z);
}
}
void AngleForce(int angle_numbers, int atom_numbers, const int *uint_crd_f, const float *scaler_f, const int *atom_a,
const int *atom_b, const int *atom_c, const float *angle_k, const float *angle_theta0, float *frc_f,
cudaStream_t stream) {
Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128, 0, stream>>>(3 * atom_numbers, frc_f, 0.);
size_t thread_per_block = 128;
size_t block_per_grid = ceilf(static_cast<float>(angle_numbers) / 128);
UNSIGNED_INT_VECTOR *uint_crd =
const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f));
VECTOR *frc = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(frc_f));
VECTOR *scaler = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(scaler_f));
AngleForceKernel<<<block_per_grid, thread_per_block, 0, stream>>>(angle_numbers, uint_crd, scaler, atom_a, atom_b,
atom_c, angle_k, angle_theta0, frc);
return;
}
void AngleForce(int angle_numbers, int atom_numbers, const int *uint_crd_f, const float *scaler_f, const int *atom_a,
const int *atom_b, const int *atom_c, const float *angle_k, const float *angle_theta0, float *frc_f,
cudaStream_t stream);

View File

@ -1,27 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_ANGLE_ANGLE_FORCE_IMPL_H_
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_ANGLE_ANGLE_FORCE_IMPL_H_
#include <curand_kernel.h>
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
CUDA_LIB_EXPORT void AngleForce(int angle_numbers, int atom_numbers, const int *uint_crd_f, const float *scaler_f,
const int *atom_a, const int *atom_b, const int *atom_c, const float *angle_k,
const float *angle_theta0, float *frc_f, cudaStream_t stream);
#endif

View File

@ -1,90 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "plugin/device/gpu/kernel/cuda_impl/sponge/angle/angle_force_with_atom_energy_impl.cuh"
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/util.cuh"
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common_sponge.cuh"
__global__ void AngleForceWithAtomEnergyKernel(int angle_numbers, const UNSIGNED_INT_VECTOR *uint_crd,
const VECTOR *scaler, const int *atom_a, const int *atom_b,
const int *atom_c, const float *angle_k, const float *angle_theta0,
VECTOR *frc, float *atom_energy) {
int angle_i = blockDim.x * blockIdx.x + threadIdx.x;
if (angle_i < angle_numbers) {
int atom_i = atom_a[angle_i];
int atom_j = atom_b[angle_i];
int atom_k = atom_c[angle_i];
float theta0 = angle_theta0[angle_i];
float k = angle_k[angle_i];
float k2 = k;
VECTOR drij = Get_Periodic_Displacement(uint_crd[atom_i], uint_crd[atom_j], scaler[0]);
VECTOR drkj = Get_Periodic_Displacement(uint_crd[atom_k], uint_crd[atom_j], scaler[0]);
float rij_2 = 1. / (drij * drij);
float rkj_2 = 1. / (drkj * drkj);
float rij_1_rkj_1 = sqrtf(rij_2 * rkj_2);
float costheta = drij * drkj * rij_1_rkj_1;
costheta = fmaxf(-0.999999, fminf(costheta, 0.999999));
float theta = acosf(costheta);
float dtheta = theta - theta0;
k = -2 * k * dtheta / sinf(theta);
float common_factor_cross = k * rij_1_rkj_1;
float common_factor_self = k * costheta;
VECTOR fi = common_factor_self * rij_2 * drij - common_factor_cross * drkj;
VECTOR fk = common_factor_self * rkj_2 * drkj - common_factor_cross * drij;
atomicAdd(&frc[atom_i].x, fi.x);
atomicAdd(&frc[atom_i].y, fi.y);
atomicAdd(&frc[atom_i].z, fi.z);
atomicAdd(&frc[atom_k].x, fk.x);
atomicAdd(&frc[atom_k].y, fk.y);
atomicAdd(&frc[atom_k].z, fk.z);
fi = -fi - fk;
atomicAdd(&frc[atom_j].x, fi.x);
atomicAdd(&frc[atom_j].y, fi.y);
atomicAdd(&frc[atom_j].z, fi.z);
atomicAdd(&atom_energy[atom_i], k2 * dtheta * dtheta);
}
}
void AngleForceWithAtomEnergy(int angle_numbers, int atom_numbers, const int *uint_crd_f, const float *scaler_f,
const int *atom_a, const int *atom_b, const int *atom_c, const float *angle_k,
const float *angle_theta0, float *frc_f, float *ene, cudaStream_t stream) {
Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128, 0, stream>>>(3 * atom_numbers, frc_f, 0.);
size_t thread_per_block = 128;
size_t block_per_grid = ceilf(static_cast<float>(angle_numbers) / 128);
UNSIGNED_INT_VECTOR *uint_crd =
const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f));
VECTOR *frc = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(frc_f));
VECTOR *scaler = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(scaler_f));
AngleForceWithAtomEnergyKernel<<<block_per_grid, thread_per_block, 0, stream>>>(
angle_numbers, uint_crd, scaler, atom_a, atom_b, atom_c, angle_k, angle_theta0, frc, ene);
return;
}
void AngleForceWithAtomEnergy(int angle_numbers, int atom_numbers, const int *uint_crd_f, const float *scaler_f,
const int *atom_a, const int *atom_b, const int *atom_c, const float *angle_k,
const float *angle_theta0, float *frc_f, float *ene, cudaStream_t stream);

View File

@ -1,28 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_ANGLE_ANGLE_FORCE_WITH_ATOM_ENERGY_IMPL_H_
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_ANGLE_ANGLE_FORCE_WITH_ATOM_ENERGY_IMPL_H_
#include <curand_kernel.h>
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
CUDA_LIB_EXPORT void AngleForceWithAtomEnergy(int angle_numbers, int atom_numbers, const int *uint_crd_f,
const float *scaler_f, const int *atom_a, const int *atom_b,
const int *atom_c, const float *angle_k, const float *angle_theta0,
float *frc_f, float *ene, cudaStream_t stream);
#endif

View File

@ -1,57 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "plugin/device/gpu/kernel/cuda_impl/sponge/bond/bond_atom_energy_cuda_gpu_impl.cuh"
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common_sponge.cuh"
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/util.cuh"
__global__ void BondAtomEnergyCudaKernel(const int bond_numbers, const UNSIGNED_INT_VECTOR *uint_crd,
const VECTOR *scaler, const int *atom_a, const int *atom_b,
const float *bond_k, const float *bond_r0, float *atom_ene) {
int bond_i = blockDim.x * blockIdx.x + threadIdx.x;
if (bond_i < bond_numbers) {
int atom_i = atom_a[bond_i];
int atom_j = atom_b[bond_i];
float k = bond_k[bond_i];
float r0 = bond_r0[bond_i];
VECTOR dr = Get_Periodic_Displacement(uint_crd[atom_i], uint_crd[atom_j], scaler[0]);
float r1 = norm3df(dr.x, dr.y, dr.z);
float tempf = r1 - r0;
atomicAdd(&atom_ene[atom_i], k * tempf * tempf);
}
}
void BondAtomEnergy(int bond_numbers, int atom_numbers, const int *uint_crd_f, const float *scaler_f, const int *atom_a,
const int *atom_b, const float *bond_k, const float *bond_r0, float *atom_ene,
cudaStream_t stream) {
Reset_List<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>(atom_numbers, atom_ene, 0.);
size_t thread_per_block = 128;
size_t block_per_grid = ceilf(static_cast<float>(bond_numbers) / 128);
UNSIGNED_INT_VECTOR *uint_crd =
const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f));
VECTOR *scaler = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(scaler_f));
BondAtomEnergyCudaKernel<<<block_per_grid, thread_per_block, 0, stream>>>(bond_numbers, uint_crd, scaler, atom_a,
atom_b, bond_k, bond_r0, atom_ene);
return;
}
void BondAtomEnergy(int bond_numbers, int atom_numbers, const int *uint_crd_f, const float *scaler_f, const int *atom_a,
const int *atom_b, const float *bond_k, const float *bond_r0, float *atom_ene, cudaStream_t stream);

View File

@ -1,28 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_BOND_BOND_ATOM_ENERGY_CUDA_GPU_IMPL_H_
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_BOND_BOND_ATOM_ENERGY_GPU_IMPL_H_
#include <curand_kernel.h>
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
CUDA_LIB_EXPORT void BondAtomEnergy(int bond_numbers, int atom_numbers, const int *uint_crd_f, const float *scaler_f,
const int *atom_a, const int *atom_b, const float *bond_k, const float *bond_r0,
float *atom_ene, cudaStream_t stream);
#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_BOND_ATOM_ENERGY_GPU_IMPL_H_

View File

@ -1,61 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "plugin/device/gpu/kernel/cuda_impl/sponge/bond/bond_energy_cuda_gpu_impl.cuh"
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common_sponge.cuh"
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/util.cuh"
__global__ void BondEnergyCudaKernel(const int bond_numbers, const UNSIGNED_INT_VECTOR *uint_crd, const VECTOR *scaler,
const int *atom_a, const int *atom_b, const float *bond_k, const float *bond_r0,
float *bond_ene) {
int bond_i = blockDim.x * blockIdx.x + threadIdx.x;
if (bond_i < bond_numbers) {
int atom_i = atom_a[bond_i];
int atom_j = atom_b[bond_i];
float k = bond_k[bond_i];
float r0 = bond_r0[bond_i];
VECTOR dr = Get_Periodic_Displacement(uint_crd[atom_i], uint_crd[atom_j], scaler[0]);
float r1 = norm3df(dr.x, dr.y, dr.z);
float tempf = r1 - r0;
float temp = k * tempf * tempf;
bond_ene[bond_i] = temp;
}
}
void BondEnergy(int bond_numbers, int atom_numbers, const unsigned int *uint_crd_f, const float *scaler_f,
const int *atom_a, const int *atom_b, const float *bond_k, const float *bond_r0, float *bond_ene,
cudaStream_t stream) {
size_t thread_per_block = 128;
size_t block_per_grid = ceilf(static_cast<float>(bond_numbers) / 128);
UNSIGNED_INT_VECTOR *uint_crd =
const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f));
VECTOR *scaler = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(scaler_f));
Reset_List<<<(unsigned int)ceilf(static_cast<float>(bond_numbers) / 128), 128, 0, stream>>>(bond_numbers,
bond_ene, 0.);
BondEnergyCudaKernel<<<block_per_grid, thread_per_block, 0, stream>>>(bond_numbers, uint_crd, scaler,
atom_a, atom_b, bond_k, bond_r0,
bond_ene);
return;
}
void BondEnergy(int bond_numbers, int atom_numbers, const unsigned int *uint_crd_f, const float *scaler_f,
const int *atom_a, const int *atom_b, const float *bond_k, const float *bond_r0,
float *bond_ene, cudaStream_t stream);

View File

@ -1,28 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_BOND_BOND_ENERGY_CUDA_GPU_IMPL_H_
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_BOND_BOND_ENERGY_CUDA_GPU_IMPL_H_
#include <curand_kernel.h>
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
CUDA_LIB_EXPORT void BondEnergy(int bond_numbers, int atom_numbers, const unsigned int *uint_crd_f,
const float *scaler_f, const int *atom_a, const int *atom_b, const float *bond_k,
const float *bond_r0, float *bond_ene, cudaStream_t stream);
#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_BOND_ENERGY_CUDA_GPU_IMPL_H_

View File

@ -1,63 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "plugin/device/gpu/kernel/cuda_impl/sponge/bond/bond_force_cuda_gpu_impl.cuh"
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common_sponge.cuh"
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/util.cuh"
__global__ void BondForceCudaKernel(int bond_numbers, const UNSIGNED_INT_VECTOR *uint_crd, const VECTOR *scaler,
const int *atom_a, const int *atom_b, const float *bond_k, const float *bond_r0,
VECTOR *frc) {
int bond_i = blockDim.x * blockIdx.x + threadIdx.x;
if (bond_i < bond_numbers) {
int atom_i = atom_a[bond_i];
int atom_j = atom_b[bond_i];
float k = bond_k[bond_i];
float r0 = bond_r0[bond_i];
VECTOR dr = Get_Periodic_Displacement(uint_crd[atom_i], uint_crd[atom_j], scaler[0]);
float r_1 = rnorm3df(dr.x, dr.y, dr.z);
float tempf = 1.0 - r0 * r_1;
VECTOR f = 2 * tempf * k * dr;
atomicAdd(&frc[atom_i].x, -f.x);
atomicAdd(&frc[atom_i].y, -f.y);
atomicAdd(&frc[atom_i].z, -f.z);
atomicAdd(&frc[atom_j].x, f.x);
atomicAdd(&frc[atom_j].y, f.y);
atomicAdd(&frc[atom_j].z, f.z);
}
}
void BondForce(int bond_numbers, int atom_numbers, const unsigned int *uint_crd_f, const float *scaler_f,
const int *atom_a, const int *atom_b, const float *bond_k, const float *bond_r0, float *frc_f,
cudaStream_t stream) {
Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128, 0, stream>>>(3 * atom_numbers, frc_f, 0.);
size_t thread_per_block = 128;
size_t block_per_grid = ceilf(static_cast<float>(bond_numbers) / 128);
UNSIGNED_INT_VECTOR *uint_crd =
const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f));
VECTOR *scaler = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(scaler_f));
VECTOR *frc = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(frc_f));
BondForceCudaKernel<<<block_per_grid, thread_per_block, 0, stream>>>(bond_numbers, uint_crd, scaler, atom_a, atom_b,
bond_k, bond_r0, frc);
return;
}
void BondForce(int bond_numbers, int atom_numbers, const unsigned int *uint_crd_f, const float *scaler_f,
const int *atom_a, const int *atom_b, const float *bond_k, const float *bond_r0,
float *frc_f, cudaStream_t stream);

View File

@ -1,27 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_BOND_BOND_FORCE_CUDA_GPU_IMPL_H_
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_BOND_BOND_FORCE_CUDA_GPU_IMPL_H_
#include <curand_kernel.h>
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
CUDA_LIB_EXPORT void BondForce(int bond_numbers, int atom_numbers, const unsigned int *uint_crd_f,
const float *scaler_f, const int *atom_a, const int *atom_b, const float *bond_k,
const float *bond_r0, float *frc_f, cudaStream_t stream);
#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_BOND_FORCE_CUDA_GPU_IMPL_H_

View File

@ -1,75 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "plugin/device/gpu/kernel/cuda_impl/sponge/bond/bond_force_with_atom_energy_and_virial_impl.cuh"
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common_sponge.cuh"
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/util.cuh"
__global__ void BondForceWithAtomEnergyAndVirialKernel(const int bond_numbers, const UNSIGNED_INT_VECTOR *uint_crd,
const VECTOR *scaler, const int *atom_a, const int *atom_b,
const float *bond_k, const float *bond_r0, VECTOR *frc,
float *atom_energy, float *atom_virial) {
int bond_i = blockDim.x * blockIdx.x + threadIdx.x;
if (bond_i < bond_numbers) {
int atom_i = atom_a[bond_i];
int atom_j = atom_b[bond_i];
float k = bond_k[bond_i];
float r0 = bond_r0[bond_i];
VECTOR dr = Get_Periodic_Displacement(uint_crd[atom_i], uint_crd[atom_j], scaler[0]);
float abs_r = norm3df(dr.x, dr.y, dr.z);
float r_1 = 1. / abs_r;
float tempf2 = abs_r - r0;
float tempf = 2 * tempf2 * k;
VECTOR f = tempf * r_1 * dr;
atomicAdd(&frc[atom_i].x, -f.x);
atomicAdd(&frc[atom_i].y, -f.y);
atomicAdd(&frc[atom_i].z, -f.z);
atomicAdd(&frc[atom_j].x, f.x);
atomicAdd(&frc[atom_j].y, f.y);
atomicAdd(&frc[atom_j].z, f.z);
atomicAdd(&atom_virial[atom_i], -tempf * abs_r);
atomicAdd(&atom_energy[atom_i], k * tempf2 * tempf2);
}
}
void BondForceWithAtomEnergyAndVirial(int bond_numbers, int atom_numbers, const unsigned int *uint_crd_f,
const float *scaler_f, const int *atom_a, const int *atom_b, const float *bond_k,
const float *bond_r0, float *frc_f, float *atom_energy, float *atom_v,
cudaStream_t stream) {
Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128, 0, stream>>>(3 * atom_numbers, frc_f, 0.);
Reset_List<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>(atom_numbers, atom_v, 0.);
Reset_List<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>(atom_numbers, atom_energy, 0.);
size_t thread_per_block = 128;
size_t block_per_grid = ceilf(static_cast<float>(bond_numbers) / 128);
UNSIGNED_INT_VECTOR *uint_crd =
const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f));
VECTOR *frc = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(frc_f));
VECTOR *scaler = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(scaler_f));
BondForceWithAtomEnergyAndVirialKernel<<<block_per_grid, thread_per_block, 0, stream>>>(
bond_numbers, uint_crd, scaler, atom_a, atom_b, bond_k, bond_r0, frc, atom_energy, atom_v);
return;
}
void BondForceWithAtomEnergyAndVirial(int bond_numbers, int atom_numbers, const unsigned int *uint_crd_f,
const float *scaler_f, const int *atom_a, const int *atom_b, const float *bond_k,
const float *bond_r0, float *frc_f, float *atom_energy, float *atom_v,
cudaStream_t stream);

View File

@ -1,29 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_BOND_BOND_FORCE_WITH_ATOM_VIRIAL_IMPL_H_
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_BOND_BOND_FORCE_WITH_ATOM_VIRIAL_IMPL_H_
#include <curand_kernel.h>
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
CUDA_LIB_EXPORT void BondForceWithAtomEnergyAndVirial(int bond_numbers, int atom_numbers,
const unsigned int *uint_crd_f, const float *scaler_f,
const int *atom_a, const int *atom_b, const float *bond_k,
const float *bond_r0, float *frc_f, float *atom_energy,
float *atom_v, cudaStream_t stream);
#endif

View File

@ -1,69 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "plugin/device/gpu/kernel/cuda_impl/sponge/bond/bond_force_with_atom_energy_impl.cuh"
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common_sponge.cuh"
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/util.cuh"
__global__ void BondForceWithAtomEnergyKernel(int bond_numbers, const UNSIGNED_INT_VECTOR *uint_crd,
const VECTOR *scaler, const int *atom_a, const int *atom_b,
const float *bond_k, const float *bond_r0, VECTOR *frc,
float *atom_energy) {
int bond_i = blockDim.x * blockIdx.x + threadIdx.x;
if (bond_i < bond_numbers) {
int atom_i = atom_a[bond_i];
int atom_j = atom_b[bond_i];
float k = bond_k[bond_i];
float r0 = bond_r0[bond_i];
VECTOR dr = Get_Periodic_Displacement(uint_crd[atom_i], uint_crd[atom_j], scaler[0]);
float abs_r = norm3df(dr.x, dr.y, dr.z);
float r_1 = 1. / abs_r;
float tempf = abs_r - r0;
VECTOR f = 2 * tempf * r_1 * k * dr;
atomicAdd(&frc[atom_i].x, -f.x);
atomicAdd(&frc[atom_i].y, -f.y);
atomicAdd(&frc[atom_i].z, -f.z);
atomicAdd(&frc[atom_j].x, f.x);
atomicAdd(&frc[atom_j].y, f.y);
atomicAdd(&frc[atom_j].z, f.z);
atomicAdd(&atom_energy[atom_i], k * tempf * tempf);
}
}
void BondForceWithAtomEnergy(int bond_numbers, int atom_numbers, const int *uint_crd_f, const float *scaler_f,
const int *atom_a, const int *atom_b, const float *bond_k, const float *bond_r0,
float *frc_f, float *atom_e, cudaStream_t stream) {
Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128, 0, stream>>>(3 * atom_numbers, frc_f, 0.);
Reset_List<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>(atom_numbers, atom_e, 0.);
size_t thread_per_block = 128;
size_t block_per_grid = ceilf(static_cast<float>(bond_numbers) / 128);
UNSIGNED_INT_VECTOR *uint_crd =
const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f));
VECTOR *frc = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(frc_f));
VECTOR *scaler = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(scaler_f));
BondForceWithAtomEnergyKernel<<<block_per_grid, thread_per_block, 0, stream>>>(bond_numbers, uint_crd, scaler, atom_a,
atom_b, bond_k, bond_r0, frc, atom_e);
return;
}
void BondForceWithAtomEnergy(int bond_numbers, int atom_numbers, const int *uint_crd_f, const float *scaler_f,
const int *atom_a, const int *atom_b, const float *bond_k, const float *bond_r0,
float *frc_f, float *atom_e, cudaStream_t stream);

View File

@ -1,28 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_BOND_BOND_FORCE_WITH_ATOM_ENERGY_IMPL_H_
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_BOND_BOND_FORCE_WITH_ATOM_ENERGY_IMPL_H_
#include <curand_kernel.h>
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
CUDA_LIB_EXPORT void BondForceWithAtomEnergy(int bond_numbers, int atom_numbers, const int *uint_crd_f,
const float *scaler_f, const int *atom_a, const int *atom_b,
const float *bond_k, const float *bond_r0, float *frc_f, float *atom_e,
cudaStream_t stream);
#endif

View File

@ -1,69 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "plugin/device/gpu/kernel/cuda_impl/sponge/bond/bond_force_with_atom_virial_impl.cuh"
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common_sponge.cuh"
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/util.cuh"
__global__ void BondForceWithAtomVirialKernel(int bond_numbers, const UNSIGNED_INT_VECTOR *uint_crd,
const VECTOR *scaler, const int *atom_a, const int *atom_b,
const float *bond_k, const float *bond_r0, VECTOR *frc,
float *atom_virial) {
int bond_i = blockDim.x * blockIdx.x + threadIdx.x;
if (bond_i < bond_numbers) {
int atom_i = atom_a[bond_i];
int atom_j = atom_b[bond_i];
float k = bond_k[bond_i];
float r0 = bond_r0[bond_i];
VECTOR dr = Get_Periodic_Displacement(uint_crd[atom_i], uint_crd[atom_j], scaler[0]);
float abs_r = norm3df(dr.x, dr.y, dr.z);
float r_1 = 1. / abs_r;
float tempf = (abs_r - r0) * k;
VECTOR f = 2 * tempf * r_1 * dr;
atomicAdd(&frc[atom_i].x, -f.x);
atomicAdd(&frc[atom_i].y, -f.y);
atomicAdd(&frc[atom_i].z, -f.z);
atomicAdd(&frc[atom_j].x, f.x);
atomicAdd(&frc[atom_j].y, f.y);
atomicAdd(&frc[atom_j].z, f.z);
atomicAdd(&atom_virial[atom_i], abs_r * tempf);
}
}
void BondForceWithAtomVirial(int bond_numbers, int atom_numbers, const int *uint_crd_f, const float *scaler_f,
const int *atom_a, const int *atom_b, const float *bond_k, const float *bond_r0,
float *frc_f, float *atom_v, cudaStream_t stream) {
Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128, 0, stream>>>(3 * atom_numbers, frc_f, 0.);
Reset_List<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>(atom_numbers, atom_v, 0.);
size_t thread_per_block = 128;
size_t block_per_grid = ceilf(static_cast<float>(bond_numbers) / 128);
UNSIGNED_INT_VECTOR *uint_crd =
const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f));
VECTOR *frc = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(frc_f));
VECTOR *scaler = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(scaler_f));
BondForceWithAtomVirialKernel<<<block_per_grid, thread_per_block, 0, stream>>>(bond_numbers, uint_crd, scaler, atom_a,
atom_b, bond_k, bond_r0, frc, atom_v);
return;
}
void BondForceWithAtomVirial(int bond_numbers, int atom_numbers, const int *uint_crd_f, const float *scaler_f,
const int *atom_a, const int *atom_b, const float *bond_k, const float *bond_r0,
float *frc_f, float *atom_v, cudaStream_t stream);

View File

@ -1,28 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_BOND_BOND_FORCE_WITH_ATOM_VIRIAL_IMPL_H_
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_BOND_BOND_FORCE_WITH_ATOM_VIRIAL_IMPL_H_
#include <curand_kernel.h>
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
CUDA_LIB_EXPORT void BondForceWithAtomVirial(int bond_numbers, int atom_numbers, const int *uint_crd_f,
const float *scaler_f, const int *atom_a, const int *atom_b,
const float *bond_k, const float *bond_r0, float *frc_f, float *atom_v,
cudaStream_t stream);
#endif

View File

@ -1,123 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common/atomcrdtocv_impl.cuh"
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common_sponge.cuh"
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/util.cuh"
__device__ __host__ float fc(float Rij) {
const float PI = 3.141592654;
const float Rc = 1000.0;
return 0.5 * cosf(PI / Rc * Rij) + 0.5;
}
__global__ void Record_Box_Map_Times(int atom_numbers, const float *crd, const float *old_crd, float *box,
int *box_map_times) {
float half_box[3] = {0.5F * box[0], 0.5F * box[1], 0.5F * box[2]};
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < atom_numbers) {
if (crd[3 * i + 0] - old_crd[3 * i + 0] > half_box[0]) {
box_map_times[3 * i + 0] = box_map_times[3 * i + 0] - 1;
} else if (crd[3 * i + 0] - old_crd[3 * i + 0] < -half_box[0]) {
box_map_times[3 * i + 0] = box_map_times[3 * i + 0] + 1;
}
if (crd[3 * i + 1] - old_crd[3 * i + 1] > half_box[1]) {
box_map_times[3 * i + 1] = box_map_times[3 * i + 1] - 1;
} else if (crd[3 * i + 1] - old_crd[3 * i + 1] < -half_box[1]) {
box_map_times[3 * i + 1] = box_map_times[3 * i + 1] + 1;
}
if (crd[3 * i + 2] - old_crd[3 * i + 2] > half_box[2]) {
box_map_times[3 * i + 2] = box_map_times[3 * i + 2] - 1;
} else if (crd[3 * i + 2] - old_crd[3 * i + 2] < -half_box[2]) {
box_map_times[3 * i + 2] = box_map_times[3 * i + 2] + 1;
}
}
}
__global__ void gen_nowarp_crd(int atom_numbers, const float *crd, float *box, int *box_map_times, float *nowarp_crd) {
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < atom_numbers) {
nowarp_crd[3 * i + 0] = static_cast<float>(box_map_times[3 * i + 0]) * box[0] + crd[3 * i + 0];
nowarp_crd[3 * i + 1] = static_cast<float>(box_map_times[3 * i + 1]) * box[1] + crd[3 * i + 1];
nowarp_crd[3 * i + 2] = static_cast<float>(box_map_times[3 * i + 2]) * box[2] + crd[3 * i + 2];
}
}
__global__ void G_Radial(const int start_serial, const int end_serial, const float *crd, float *g_radial) {
const float Rs = 0.5, Eta = 0.5;
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i >= start_serial && i < end_serial) {
float rij;
float g_radial_lin = 0.;
for (int j = start_serial; j < end_serial; j = j + 1) {
if (j != i) {
// rij = sqrtf((crd[3*i+0] - crd[j]) * (crd[i] - crd[j]));
rij = sqrtf(normfloat(crd, crd, i, j));
g_radial_lin = g_radial_lin + expf(-Eta * (rij - Rs) * (rij - Rs)) * fc(rij);
} else {
continue;
}
}
g_radial[i] = g_radial_lin;
}
}
__global__ void G_Angular(const int start_serial, const int end_serial, const float *crd, float *g_angular) {
const float Rs = 0.5, Thetas = 3.14, Eta = 0.5, Zeta = 2.0;
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i >= start_serial && i < end_serial) {
float rij, rik, rjk, theta_jik;
float g_angular_lin = 0.;
for (int j = start_serial; j < end_serial; j = j + 1) {
if (j != i) {
rij = sqrtf(normfloat(crd, crd, i, j));
for (int k = j + 1; k < end_serial; k = k + 1) {
if (k != i) {
rik = sqrtf(normfloat(crd, crd, i, k));
rjk = sqrtf(normfloat(crd, crd, j, k));
theta_jik =
acosf(fmaxf(fminf((rij * rij + rik * rik - rjk * rjk) / (2. * rij * rik), 0.999999), -0.999999));
g_angular_lin = g_angular_lin + powf(1. + cosf(theta_jik - Thetas), Zeta) *
expf(-Eta * powf(0.5 * (rij + rik) - Rs, 2.)) * fc(rij) * fc(rik);
} else {
continue;
}
}
} else {
continue;
}
}
g_angular[i] = powf(2., 1. - Zeta) * g_angular_lin;
}
}
void AtomCrdToCV(int atom_numbers, int start_serial, int end_serial, int number, const float *crd_f,
const float *old_crd, float *nowarp_crd, int *box_map_times, float *box, float *g_radial,
float *g_angular, cudaStream_t stream) {
Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128, 0, stream>>>(3 * atom_numbers, box_map_times,
0);
Record_Box_Map_Times<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128, 0, stream>>>(
atom_numbers, crd_f, old_crd, box, box_map_times);
gen_nowarp_crd<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128, 0, stream>>>(atom_numbers, crd_f, box,
box_map_times, nowarp_crd);
G_Radial<<<1, number, 0, stream>>>(start_serial, end_serial, nowarp_crd, g_radial);
G_Angular<<<1, number, 0, stream>>>(start_serial, end_serial, nowarp_crd, g_angular);
return;
}
void AtomCrdToCV(int atom_numbers, int start_serial, int end_serial, int number, const float *crd_f,
const float *old_crd, float *nowarp_crd, int *box_map_times, float *box, float *g_radial,
float *g_angular, cudaStream_t stream);

View File

@ -1,27 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_COMMON_ATOMCRDTOCV_IMPL_H_
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_COMMON_ATOMCRDTOCV_IMPL_H_
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
CUDA_LIB_EXPORT void AtomCrdToCV(int atom_numbers, int start_serial, int end_serial, int number, const float *crd_f,
const float *old_crd, float *nowarp_crd, int *box_map_times, float *box,
float *g_radial, float *g_angular, cudaStream_t stream);
#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_COMMON_ATOMCRDTOCV_IMPL_H_

View File

@ -1,51 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common_sponge.cuh"
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common/crd_to_uint_crd_impl.cuh"
__global__ void Crd_To_Uint_Crd(const int atom_numbers, const VECTOR *scale_factor, const VECTOR *crd,
UNSIGNED_INT_VECTOR *uint_crd) {
int atom_i = blockDim.x * blockIdx.x + threadIdx.x;
if (atom_i < atom_numbers) {
uint_crd[atom_i].uint_x = crd[atom_i].x * scale_factor[0].x;
uint_crd[atom_i].uint_y = crd[atom_i].y * scale_factor[0].y;
uint_crd[atom_i].uint_z = crd[atom_i].z * scale_factor[0].z;
/*uint_crd[atom_i].uint_x = 2 * uint_crd[atom_i].uint_x;
uint_crd[atom_i].uint_y = 2 * uint_crd[atom_i].uint_y;
uint_crd[atom_i].uint_z = 2 * uint_crd[atom_i].uint_z;*/
uint_crd[atom_i].uint_x = uint_crd[atom_i].uint_x << 1;
uint_crd[atom_i].uint_y = uint_crd[atom_i].uint_y << 1;
uint_crd[atom_i].uint_z = uint_crd[atom_i].uint_z << 1;
}
}
void CrdToUintCrd(const int atom_numbers, const float *crd_to_uint_crd_cof_f, const float *crd_f,
unsigned int *uint_crd_f, cudaStream_t stream) {
VECTOR *crd = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(crd_f));
VECTOR *crd_to_uint_crd_cof = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(crd_to_uint_crd_cof_f));
UNSIGNED_INT_VECTOR *uint_crd =
const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f));
Crd_To_Uint_Crd<<<ceilf(static_cast<float>(atom_numbers) / 128.0), 128, 0, stream>>>(
atom_numbers, crd_to_uint_crd_cof, crd, uint_crd);
return;
}
void CrdToUintCrd(const int atom_numbers, const float *crd_to_uint_crd_cof_f, const float *crd_f,
unsigned int *uint_crd_f, cudaStream_t stream);

View File

@ -1,27 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_CRD_TO_UINT_CRD_IMPL_H_
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_CRD_TO_UINT_CRD_IMPL_H_
#include <curand_kernel.h>
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
CUDA_LIB_EXPORT void CrdToUintCrd(const int atom_numbers, const float *crd_to_uint_crd_cof_f, const float *crd_f,
unsigned int *uint_crd_f, cudaStream_t stream);
#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_CRD_TO_UINT_CRD_IMPL_H_

View File

@ -1,54 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common/crd_to_uint_crd_quarter_impl.cuh"
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common_sponge.cuh"
__global__ void Crd_To_Uint_Crd_Quarter(const int atom_numbers, const VECTOR *scale_factor, const VECTOR *crd,
UNSIGNED_INT_VECTOR *uint_crd) {
int atom_i = blockDim.x * blockIdx.x + threadIdx.x;
if (atom_i < atom_numbers) {
INT_VECTOR tempi;
VECTOR temp = crd[atom_i];
temp.x *= scale_factor[0].x;
temp.y *= scale_factor[0].y;
temp.z *= scale_factor[0].z;
tempi.int_x = temp.x;
tempi.int_y = temp.y;
tempi.int_z = temp.z;
uint_crd[atom_i].uint_x = (tempi.int_x << 2);
uint_crd[atom_i].uint_y = (tempi.int_y << 2);
uint_crd[atom_i].uint_z = (tempi.int_z << 2);
}
}
void CrdToUintCrdQuarter(const int atom_numbers, const float *crd_to_uint_crd_cof_f, const float *crd_f,
unsigned int *uint_crd_f, cudaStream_t stream) {
VECTOR *crd = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(crd_f));
VECTOR *crd_to_uint_crd_cof = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(crd_to_uint_crd_cof_f));
UNSIGNED_INT_VECTOR *uint_crd =
const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f));
Crd_To_Uint_Crd_Quarter<<<ceilf(static_cast<float>(atom_numbers) / 128.0), 128, 0, stream>>>(
atom_numbers, crd_to_uint_crd_cof, crd, uint_crd);
return;
}
void CrdToUintCrdQuarter(const int atom_numbers, const float *crd_to_uint_crd_cof_f, const float *crd_f,
unsigned int *uint_crd_f, cudaStream_t stream);

View File

@ -1,27 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_CRD_TO_UINT_CRD_QUARTER_IMPL_H_
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_CRD_TO_UINT_CRD_QUARTER_IMPL_H_
#include <curand_kernel.h>
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
CUDA_LIB_EXPORT void CrdToUintCrdQuarter(const int atom_numbers, const float *crd_to_uint_crd_cof_f, const float *crd_f,
unsigned int *uint_crd_f, cudaStream_t stream);
#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_CRD_TO_UINT_CRD_QUARTER_IMPL_H_

View File

@ -1,44 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common_sponge.cuh"
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common/get_center_of_mass_impl.cuh"
__global__ void Get_Center_Of_Mass(int residue_numbers, int *start, int *end, VECTOR *crd, float *atom_mass,
float *residue_mass_inverse, VECTOR *center_of_mass) {
for (int residue_i = blockDim.x * blockIdx.x + threadIdx.x; residue_i < residue_numbers;
residue_i += gridDim.x * blockDim.x) {
VECTOR com_lin = {0.0f, 0.0f, 0.0f};
for (int atom_i = start[residue_i]; atom_i < end[residue_i]; atom_i += 1) {
com_lin = com_lin + atom_mass[atom_i] * crd[atom_i];
}
center_of_mass[residue_i] = residue_mass_inverse[residue_i] * com_lin;
}
}
void GetCenterOfMass(int residue_numbers, int *start, int *end, float *crd_f, float *atom_mass,
float *residue_mass_inverse, float *center_of_mass_f, cudaStream_t stream) {
Reset_List<<<ceilf(static_cast<float>(3. * residue_numbers) / 128), 128, 0, stream>>>(3 * residue_numbers,
center_of_mass_f, 0.);
VECTOR *crd = reinterpret_cast<VECTOR *>(crd_f);
VECTOR *center_of_mass = reinterpret_cast<VECTOR *>(center_of_mass_f);
Get_Center_Of_Mass<<<20, 32, 0, stream>>>(residue_numbers, start, end, crd, atom_mass, residue_mass_inverse,
center_of_mass);
return;
}
void GetCenterOfMass(int residue_numbers, int *start, int *end, float *crd_f, float *atom_mass,
float *residue_mass_inverse, float *center_of_mass_f, cudaStream_t stream);

View File

@ -1,27 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_COMMON_GETCENTEROFMASS_IMPL_H_
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_COMMON_GETCENTEROFMASS_IMPL_H_
#include <curand_kernel.h>
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
CUDA_LIB_EXPORT void GetCenterOfMass(int residue_numbers, int *start, int *end, float *crd_f, float *atom_mass,
float *residue_mass_inverse, float *center_of_mass_f, cudaStream_t stream);
#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_COMMON_GETCENTER_IMPL_H_

View File

@ -1,43 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common_sponge.cuh"
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common/getcenter_impl.cuh"
__global__ void GetCenterOfGeometryKernel(const int center_numbers, float center_numbers_inverse,
const int *center_atoms, const VECTOR *crd, VECTOR *center_of_geometry) {
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < center_numbers) {
int atom_i = center_atoms[i];
VECTOR temp = center_numbers_inverse * crd[atom_i];
atomicAdd(&center_of_geometry[0].x, temp.x);
atomicAdd(&center_of_geometry[0].y, temp.y);
atomicAdd(&center_of_geometry[0].z, temp.z);
}
}
void GetCenterOfGeometry(const int center_numbers, float center_numbers_inverse, const int *center_atoms,
const float *crd_f, float *center_of_geometry_f, cudaStream_t stream) {
VECTOR *crd = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(crd_f));
VECTOR *center_of_geometry = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(center_of_geometry_f));
GetCenterOfGeometryKernel<<<ceilf(static_cast<float>(center_numbers) / 32), 32, 0, stream>>>(
center_numbers, center_numbers_inverse, center_atoms, crd, center_of_geometry);
return;
}
void GetCenterOfGeometry(const int center_numbers, float center_numbers_inverse, const int *center_atoms, float *crd_f,
float *center_of_geometry_f, cudaStream_t stream);

View File

@ -1,28 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_COMMON_GETCENTER_IMPL_H_
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_COMMON_GETCENTER_IMPL_H_
#include <curand_kernel.h>
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
CUDA_LIB_EXPORT void GetCenterOfGeometry(const int center_numbers, float center_numbers_inverse,
const int *center_atoms, const float *crd_f, float *center_of_geometry_f,
cudaStream_t stream);
#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_COMMON_GETCENTER_IMPL_H_

View File

@ -1,51 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common_sponge.cuh"
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common/map_center_of_mass_impl.cuh"
__global__ void Map_Center_Of_Mass(int residue_numbers, int *start, int *end,
float *scaler, VECTOR *center_of_mass, VECTOR *box_length, VECTOR *no_wrap_crd, VECTOR *crd) {
VECTOR trans_vec;
VECTOR com;
for (int residue_i = blockDim.x*blockIdx.x + threadIdx.x; residue_i < residue_numbers;
residue_i += gridDim.x * blockDim.x) {
com = center_of_mass[residue_i];
trans_vec.x = com.x - floorf(com.x / box_length[0].x) * box_length[0].x;
trans_vec.y = com.y - floorf(com.y / box_length[0].y) * box_length[0].y;
trans_vec.z = com.z - floorf(com.z / box_length[0].z) * box_length[0].z;
trans_vec = scaler[0] * trans_vec - com;
for (int atom_i = start[residue_i] + threadIdx.y; atom_i < end[residue_i]; atom_i += blockDim.y) {
crd[atom_i] = no_wrap_crd[atom_i] + trans_vec;
}
}
}
void MapCenterOfMass(int residue_numbers, int *start, int *end, float *center_of_mass_f,
float *box_length_f, float *no_wrap_crd_f, float *crd_f, float* scaler, cudaStream_t stream) {
VECTOR *crd = reinterpret_cast<VECTOR *>(crd_f);
VECTOR *no_wrap_crd = reinterpret_cast<VECTOR *>(no_wrap_crd_f);
VECTOR *box_length = reinterpret_cast<VECTOR *>(box_length_f);
VECTOR *center_of_mass = reinterpret_cast<VECTOR *>(center_of_mass_f);
Map_Center_Of_Mass<<<20, { 32, 4 } , 0, stream>>>(residue_numbers, start, end, scaler, center_of_mass, box_length,
no_wrap_crd, crd);
return;
}
void MapCenterOfMass(int residue_numbers, int *start, int *end, float *center_of_mass_f,
float *box_length_f, float *no_wrap_crd_f, float *crd_f, float* scaler, cudaStream_t stream);

View File

@ -1,28 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_COMMON_MAPCENTEROFMASS_IMPL_H_
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_COMMON_MAPCENTEROFMASS_IMPL_H_
#include <curand_kernel.h>
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
CUDA_LIB_EXPORT void MapCenterOfMass(int residue_numbers, int *start, int *end, float *center_of_mass_f,
float *box_length_f, float *no_wrap_crd_f, float *crd_f, float *scaler,
cudaStream_t stream);
#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_COMMON_MAPCENTEROFMASS_IMPL_H_

View File

@ -1,49 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common_sponge.cuh"
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common/mdtemperature_impl.cuh"
__global__ void MDTemperatureKernel(const int residue_numbers, const int *start, const int *end, const VECTOR *atom_vel,
const float *atom_mass, float *ek) {
int residue_i = blockDim.x * blockIdx.x + threadIdx.x;
if (residue_i < residue_numbers) {
VECTOR momentum = {0., 0., 0.};
float res_mass = 0.;
int s = start[residue_i];
int e = end[residue_i];
float mass_lin;
for (int atom_i = s; atom_i < e; atom_i = atom_i + 1) {
mass_lin = atom_mass[atom_i];
momentum.x = momentum.x + mass_lin * atom_vel[atom_i].x;
momentum.y = momentum.y + mass_lin * atom_vel[atom_i].y;
momentum.z = momentum.z + mass_lin * atom_vel[atom_i].z;
res_mass = res_mass + mass_lin;
}
ek[residue_i] = 0.5 * (momentum.x * momentum.x + momentum.y * momentum.y + momentum.z * momentum.z) / res_mass *
2. / 3. / CONSTANT_kB / residue_numbers;
}
}
void MDTemperature(const int residue_numbers, const int *start, const int *end, const float *atom_vel_f,
const float *atom_mass, float *ek, cudaStream_t stream) {
VECTOR *atom_vel = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(atom_vel_f));
MDTemperatureKernel<<<ceilf(static_cast<float>(residue_numbers) / 32), 32, 0, stream>>>(residue_numbers, start, end,
atom_vel, atom_mass, ek);
return;
}
void MDTemperature(const int residue_numbers, const int *start, const int *end, const float *atom_vel_f,
const float *atom_mass, float *ek, cudaStream_t stream);

View File

@ -1,26 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_COMMON_MDTEMPERATURE_IMPL_H_
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_COMMON_MDTEMPERATURE_IMPL_H_
#include <curand_kernel.h>
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
CUDA_LIB_EXPORT void MDTemperature(const int residue_numbers, const int *start, const int *end, const float *atom_vel_f,
const float *atom_mass, float *ek, cudaStream_t stream);
#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_COMMON_MDTEMPERATURE_IMPL_H_

View File

@ -1,48 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common/total_c6_get_impl.cuh"
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common_sponge.cuh"
__global__ void Total_C6_Get(int atom_numbers, int *atom_lj_type, float *d_lj_b, float *d_factor) {
int i, j;
float temp_sum = 0;
d_factor[0] = 0;
int x, y;
int itype, jtype, atom_pair_LJ_type;
for (i = blockIdx.x * blockDim.x + threadIdx.x; i < atom_numbers; i += gridDim.x * blockDim.x) {
itype = atom_lj_type[i];
for (j = blockIdx.y * blockDim.y + threadIdx.y; j < atom_numbers; j += gridDim.y * blockDim.y) {
jtype = atom_lj_type[j];
y = (jtype - itype);
x = y >> 31;
y = (y ^ x) - x;
x = jtype + itype;
jtype = (x + y) >> 1;
x = (x - y) >> 1;
atom_pair_LJ_type = (jtype * (jtype + 1) >> 1) + x;
temp_sum += d_lj_b[atom_pair_LJ_type];
}
}
atomicAdd(d_factor, temp_sum);
}
void total_c6_get(int atom_numbers, int *atom_lj_type, float *d_lj_b, float *d_factor, cudaStream_t stream) {
Total_C6_Get<<<{4, 4}, {32, 32}, 0, stream>>>(atom_numbers, atom_lj_type, d_lj_b, d_factor);
return;
}
void total_c6_get(int atom_numbers, int *atom_lj_type, float *d_lj_b, float *d_factor, cudaStream_t stream);

View File

@ -1,26 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_COMMON_TOTAL_C6_GET_IMPL_H_
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_COMMON_TOTAL_C6_GET_IMPL_H_
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
CUDA_LIB_EXPORT void total_c6_get(int atom_numbers, int *atom_lj_type, float *d_lj_b, float *d_factor,
cudaStream_t stream);
#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_COMMON_TOTAL_C6_GET_IMPL_H_

View File

@ -1,366 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_COMMON_SPONGE_H_
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_COMMON_SPONGE_H_
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <time.h>
#include <curand_kernel.h>
#include <cuda_runtime.h>
#include <device_launch_parameters.h>
#include <cufft.h>
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
#define TWO_DIVIDED_BY_SQRT_PI 1.1283791670218446
#define CONSTANT_kB 0.00198716
#define CONSTANT_Pi 3.1415926535897932f
static dim3 thread_LJ(8, 32);
__constant__ float XRD3D_Ma[4] = {1.0 / 6.0, -0.5, 0.5, -1.0 / 6.0};
__constant__ float XRD3D_Mb[4] = {0, 0.5, -1, 0.5};
__constant__ float XRD3D_Mc[4] = {0, 0.5, 0, -0.5};
__constant__ float XRD3D_Md[4] = {0, 1.0 / 6.0, 4.0 / 6.0, 1.0 / 6.0};
__constant__ float XRD3D_dMa[4] = {0.5, -1.5, 1.5, -0.5};
__constant__ float XRD3D_dMb[4] = {0, 1, -2, 1};
__constant__ float XRD3D_dMc[4] = {0, 0.5, 0, -0.5};
struct VECTOR {
float x;
float y;
float z;
};
struct INT_VECTOR {
int int_x;
int int_y;
int int_z;
};
struct UNSIGNED_INT_VECTOR {
unsigned int uint_x;
unsigned int uint_y;
unsigned int uint_z;
};
struct NEIGHBOR_LIST {
int atom_numbers;
int *atom_serial;
};
struct UINT_VECTOR_LJ_TYPE {
unsigned int uint_x;
unsigned int uint_y;
unsigned int uint_z;
int LJ_type;
float charge;
};
struct ATOM_NEAR {
int *atom_serial;
};
struct GRID_BUCKET {
int *atom_serial;
};
struct GRID_POINTER {
int *grid_serial;
};
struct VIRTUAL_TYPE_0 {
float virtual_atom;
float from_1;
float h_double;
};
struct VIRTUAL_TYPE_1 {
float virtual_atom;
float from_1;
float from_2;
float a;
};
struct VIRTUAL_TYPE_2 {
float virtual_atom;
float from_1;
float from_2;
float from_3;
float a;
float b;
};
struct VIRTUAL_TYPE_3 {
float virtual_atom;
float from_1;
float from_2;
float from_3;
float d;
float k;
};
struct CONSTRAIN_PAIR {
int atom_i_serial;
int atom_j_serial;
float constant_r;
float constrain_k;
};
__device__ __host__ static inline VECTOR operator-(const VECTOR &veca, const VECTOR &vecb) {
VECTOR vec;
vec.x = veca.x - vecb.x;
vec.y = veca.y - vecb.y;
vec.z = veca.z - vecb.z;
return vec;
}
__device__ __host__ static inline VECTOR Get_Periodic_Displacement(const UNSIGNED_INT_VECTOR uvec_a,
const UNSIGNED_INT_VECTOR uvec_b,
const VECTOR scaler) {
VECTOR dr;
dr.x = (static_cast<int>(uvec_a.uint_x - uvec_b.uint_x)) * scaler.x;
dr.y = (static_cast<int>(uvec_a.uint_y - uvec_b.uint_y)) * scaler.y;
dr.z = (static_cast<int>(uvec_a.uint_z - uvec_b.uint_z)) * scaler.z;
return dr;
}
__device__ __host__ static inline VECTOR Get_Periodic_Displacement(const UINT_VECTOR_LJ_TYPE uvec_a,
const UINT_VECTOR_LJ_TYPE uvec_b,
const VECTOR scaler) {
VECTOR dr;
dr.x = (static_cast<int>(uvec_a.uint_x - uvec_b.uint_x)) * scaler.x;
dr.y = (static_cast<int>(uvec_a.uint_y - uvec_b.uint_y)) * scaler.y;
dr.z = (static_cast<int>(uvec_a.uint_z - uvec_b.uint_z)) * scaler.z;
return dr;
}
__device__ __host__ static inline VECTOR Get_Periodic_Displacement(const VECTOR vec_a, const VECTOR vec_b,
const VECTOR box_length) {
VECTOR dr;
dr = vec_a - vec_b;
dr.x = dr.x - floorf(dr.x / box_length.x + 0.5) * box_length.x;
dr.y = dr.y - floorf(dr.y / box_length.y + 0.5) * box_length.y;
dr.z = dr.z - floorf(dr.z / box_length.z + 0.5) * box_length.z;
return dr;
}
__device__ __host__ static inline VECTOR Get_Periodic_Displacement(const VECTOR vec_a, const VECTOR vec_b,
const VECTOR box_length,
const VECTOR box_length_inverse) {
VECTOR dr;
dr = vec_a - vec_b;
dr.x = dr.x - floorf(dr.x * box_length_inverse.x + 0.5) * box_length.x;
dr.y = dr.y - floorf(dr.y * box_length_inverse.y + 0.5) * box_length.y;
dr.z = dr.z - floorf(dr.z * box_length_inverse.z + 0.5) * box_length.z;
return dr;
}
__device__ __host__ static inline VECTOR operator+(const VECTOR &veca, const VECTOR &vecb) {
VECTOR vec;
vec.x = veca.x + vecb.x;
vec.y = veca.y + vecb.y;
vec.z = veca.z + vecb.z;
return vec;
}
__device__ __host__ static inline float operator*(const VECTOR &veca, const VECTOR &vecb) {
return veca.x * vecb.x + veca.y * vecb.y + veca.z * vecb.z;
}
__device__ __host__ static inline VECTOR operator*(const float &a, const VECTOR &vecb) {
VECTOR vec;
vec.x = a * vecb.x;
vec.y = a * vecb.y;
vec.z = a * vecb.z;
return vec;
}
__device__ __host__ static inline VECTOR operator-(const VECTOR &vecb) {
VECTOR vec;
vec.x = -vecb.x;
vec.y = -vecb.y;
vec.z = -vecb.z;
return vec;
}
__device__ __host__ static inline VECTOR operator^(const VECTOR &veca, const VECTOR &vecb) {
VECTOR vec;
vec.x = veca.y * vecb.z - veca.z * vecb.y;
vec.y = veca.z * vecb.x - veca.x * vecb.z;
vec.z = veca.x * vecb.y - veca.y * vecb.x;
return vec;
}
__device__ __host__ static inline float normfloat(const float *x, const float *y, int i, int j) {
float s = 0;
s += (x[3 * i + 0] - y[3 * j + 0]) * (x[3 * i + 0] - y[3 * j + 0]);
s += (x[3 * i + 1] - y[3 * j + 1]) * (x[3 * i + 1] - y[3 * j + 1]);
s += (x[3 * i + 2] - y[3 * j + 2]) * (x[3 * i + 2] - y[3 * j + 2]);
return s;
}
__global__ static void construct_neighbor_list_kernel(int atom_numbers, int max_neighbor_numbers, int *nl_atom_numbers,
int *nl_atom_serial, NEIGHBOR_LIST *nl) {
for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < atom_numbers; i += gridDim.x * blockDim.x) {
nl[i].atom_numbers = nl_atom_numbers[i];
nl[i].atom_serial = nl_atom_serial + i * max_neighbor_numbers;
}
}
__global__ static void construct_atom_near(int atom_numbers, int near_numbers, int *atom_serial, ATOM_NEAR *an) {
for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < atom_numbers; i += gridDim.x * blockDim.x) {
an[i].atom_serial = atom_serial + i * near_numbers;
}
}
static inline bool Malloc_Safely(void **address, size_t size) {
address[0] = NULL;
address[0] = reinterpret_cast<void *>(malloc(size));
if (address[0] != NULL) {
return true;
} else {
printf("malloc failed!\n");
getchar();
return false;
}
}
static inline bool Cuda_Malloc_Safely(void **address, size_t size) {
cudaError_t cuda_error = cudaMalloc(&address[0], size);
if (cuda_error == 0) {
return true;
} else {
printf("cudaMalloc failed! error %d\n", cuda_error);
getchar();
return false;
}
}
__global__ static void construct_constrain_pair(int constrain_pair_numbers, const int *atom_i_serials,
const int *atom_j_serials, const float *constant_rs,
const float *constrain_ks, CONSTRAIN_PAIR *constrain_pair) {
int atom_i = blockDim.x * blockIdx.x + threadIdx.x;
if (atom_i < constrain_pair_numbers) {
constrain_pair[atom_i].atom_i_serial = atom_i_serials[atom_i];
constrain_pair[atom_i].atom_j_serial = atom_j_serials[atom_i];
constrain_pair[atom_i].constant_r = constant_rs[atom_i];
constrain_pair[atom_i].constrain_k = constrain_ks[atom_i];
}
}
__global__ static void Copy_Crd_To_New_Crd_Start(const int atom_numbers, const UNSIGNED_INT_VECTOR *crd,
UINT_VECTOR_LJ_TYPE *new_crd, const int *LJ_type,
const float *charge) {
int atom_i = blockDim.x * blockIdx.x + threadIdx.x;
if (atom_i < atom_numbers) {
new_crd[atom_i].uint_x = crd[atom_i].uint_x;
new_crd[atom_i].uint_y = crd[atom_i].uint_y;
new_crd[atom_i].uint_z = crd[atom_i].uint_z;
new_crd[atom_i].LJ_type = LJ_type[atom_i];
new_crd[atom_i].charge = charge[atom_i];
}
}
// void Constrain_Force_Cycle_With_Virial(int atom_numbers, int constrain_pair_numbers, const unsigned int *uint_crd_f,
// const float *scaler_f, float *constrain_pair_f, const float *pair_dr_f,
// const int *atom_i_serials, const int *atom_j_serials, const float
// *constant_rs, const float *constrain_ks, float *test_frc_f, float
// *d_atom_virial, cudaStream_t stream);
__global__ static void Rand_Normal(const int float4_numbers, curandStatePhilox4_32_10_t *rand_state,
float4 *rand_float4) {
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < float4_numbers) {
rand_float4[i] = curand_normal4(&rand_state[i]);
}
}
__global__ static void Setup_Rand_Normal_Kernel(const int float4_numbers, curandStatePhilox4_32_10_t *rand_state,
const int seed) {
int id = threadIdx.x + blockIdx.x * blockDim.x;
/* Each thread gets same seed, a different sequence
number, no offset */
if (id < float4_numbers) {
curand_init(seed, id, 0, &rand_state[id]);
}
}
__global__ static void Reset_List(const int element_numbers, int *list, const int replace_element) {
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < element_numbers) {
list[i] = replace_element;
}
}
__global__ static void Reset_List(const int element_numbers, float *list, const float replace_element) {
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < element_numbers) {
list[i] = replace_element;
}
}
__global__ static void Sum_Of_List(const int element_numbers, const float *list, float *sum) {
if (threadIdx.x == 0) {
sum[0] = 0.;
}
__syncthreads();
float lin = 0.;
for (int i = threadIdx.x; i < element_numbers; i = i + blockDim.x) {
lin = lin + list[i];
}
atomicAdd(sum, lin);
}
__global__ static void Scale_List(const int element_numbers, float *list, float scaler) {
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < element_numbers) {
list[i] = list[i] * scaler;
}
}
__global__ static void Copy_List(const int element_numbers, const int *origin_list, int *list) {
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < element_numbers) {
list[i] = origin_list[i];
}
}
__global__ static void Copy_List(const int element_numbers, const float *origin_list, float *list) {
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < element_numbers) {
list[i] = origin_list[i];
}
}
__global__ static void Print(const size_t size, const float *input_x) {
for (size_t i = 0; i < size; i++) {
printf("%f\n", input_x[i]);
}
return;
}
__global__ static void Print(const size_t size, const int *input_x) {
for (size_t i = 0; i < size; i++) {
printf("%d\n", input_x[i]);
}
return;
}
__device__ static VECTOR Make_Vector_Not_Exceed_Value(VECTOR vector, const float value) {
return fminf(1.0, value * rnorm3df(vector.x, vector.y, vector.z)) * vector;
}
#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_COMMON_SPONGE_H_

View File

@ -1,46 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "plugin/device/gpu/kernel/cuda_impl/sponge/crdmcmap/cal_no_wrap_crd_impl.cuh"
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common_sponge.cuh"
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/util.cuh"
__global__ void Calculate_No_Wrap_Crd(int atom_numbers, INT_VECTOR *box_map_times, VECTOR *box, VECTOR *crd,
VECTOR *nowrap_crd) {
for (int i = threadIdx.x; i < atom_numbers; i = i + blockDim.x) {
nowrap_crd[i].x = static_cast<float>(box_map_times[i].int_x) * box[0].x + crd[i].x;
nowrap_crd[i].y = static_cast<float>(box_map_times[i].int_y) * box[0].y + crd[i].y;
nowrap_crd[i].z = static_cast<float>(box_map_times[i].int_z) * box[0].z + crd[i].z;
}
}
void calculatenowrapcrd(int atom_numbers, int *box_map_times_f, float *box_f, float *crd_f, float *nowrap_crd_f,
cudaStream_t stream) {
Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128, 0, stream>>>(3 * atom_numbers, nowrap_crd_f,
0.);
INT_VECTOR *box_map_times = reinterpret_cast<INT_VECTOR *>(box_map_times_f);
VECTOR *box = reinterpret_cast<VECTOR *>(box_f);
VECTOR *crd = reinterpret_cast<VECTOR *>(crd_f);
VECTOR *nowrap_crd = reinterpret_cast<VECTOR *>(nowrap_crd_f);
Calculate_No_Wrap_Crd<<<20, 256, 0, stream>>>(atom_numbers, box_map_times, box, crd,
nowrap_crd);
return;
}
void calculatenowrapcrd(int atom_numbers, int *box_map_times_f, float *box_f, float *crd_f, float *nowrap_crd_f,
cudaStream_t stream);

View File

@ -1,28 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_CRDMCMAP_CAL_NO_WRAP_CRD_IMPL_H_
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_CRDMCMAP_CAL_NO_WRAP_CRD_IMPL_H_
#include <curand_kernel.h>
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
CUDA_LIB_EXPORT void calculatenowrapcrd(int atom_numbers, int *box_map_times_f, float *box_f, float *crd_f,
float *nowrap_crd_f, cudaStream_t stream);
#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_CRDMCMAP_CAL_NO_WRAP_CRD_IMPL_H_

View File

@ -1,47 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "plugin/device/gpu/kernel/cuda_impl/sponge/crdmcmap/refresh_boxmaptimes_impl.cuh"
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common_sponge.cuh"
__global__ void Refresh_BoxMapTimes_CUDA(int atom_numbers, VECTOR *box_length_inverse, VECTOR *crd,
INT_VECTOR *box_map_times, VECTOR *old_crd) {
VECTOR crd_i, old_crd_i;
for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < atom_numbers; i += gridDim.x * blockDim.x) {
crd_i = crd[i];
old_crd_i = old_crd[i];
box_map_times[i].int_x += floor((old_crd_i.x - crd_i.x) * box_length_inverse[0].x + 0.5);
box_map_times[i].int_y += floor((old_crd_i.y - crd_i.y) * box_length_inverse[0].y + 0.5);
box_map_times[i].int_z += floor((old_crd_i.z - crd_i.z) * box_length_inverse[0].z + 0.5);
old_crd[i] = crd_i;
}
}
void refresh_boxmaptimes(int atom_numbers, float *box_length_inverse_f, float *crd_f, float *old_crd_f,
int *box_map_times_f, cudaStream_t stream) {
INT_VECTOR *box_map_times = reinterpret_cast<INT_VECTOR *>(box_map_times_f);
VECTOR *box_length_inverse = reinterpret_cast<VECTOR *>(box_length_inverse_f);
VECTOR *crd = reinterpret_cast<VECTOR *>(crd_f);
VECTOR *old_crd = reinterpret_cast<VECTOR *>(old_crd_f);
Refresh_BoxMapTimes_CUDA<<<1, 256, 0, stream>>>(atom_numbers, box_length_inverse, crd,
box_map_times, old_crd);
return;
}
void refresh_boxmaptimes(int atom_numbers, float *box_length_inverse, float *crd_f, float *old_crd_f,
int *box_map_times_f, cudaStream_t stream);

View File

@ -1,28 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_CRDMCMAP_REFRESH_BOXMAPTIMES_IMPL_H_
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_CRDMCMAP_REFRESH_BOXMAPTIMES_IMPL_H_
#include <curand_kernel.h>
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
CUDA_LIB_EXPORT void refresh_boxmaptimes(int atom_numbers, float *box_length_inverse, float *crd_f, float *old_crd_f,
int *box_map_times_f, cudaStream_t stream);
#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_CRDMCMAP_REFRESH_BOXMAPTIMES_IMPL_H_

View File

@ -1,84 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "plugin/device/gpu/kernel/cuda_impl/sponge/dihedral/dihedral_atom_energy_impl.cuh"
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/util.cuh"
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common_sponge.cuh"
__global__ void DihedralAtomEnergyKernel(int dihedral_numbers, const UNSIGNED_INT_VECTOR *uint_crd,
const VECTOR *scaler, const int *atom_a, const int *atom_b, const int *atom_c,
const int *atom_d, const int *ipn, const float *pk, const float *gamc,
const float *gams, const float *pn, float *ene) {
int dihedral_i = blockDim.x * blockIdx.x + threadIdx.x;
if (dihedral_i < dihedral_numbers) {
int atom_i = atom_a[dihedral_i];
int atom_j = atom_b[dihedral_i];
int atom_k = atom_c[dihedral_i];
int atom_l = atom_d[dihedral_i];
float temp_pk = pk[dihedral_i];
float temp_pn = pn[dihedral_i];
float temp_gamc = gamc[dihedral_i];
float temp_gams = gams[dihedral_i];
VECTOR drij = Get_Periodic_Displacement(uint_crd[atom_i], uint_crd[atom_j], scaler[0]);
VECTOR drkj = Get_Periodic_Displacement(uint_crd[atom_k], uint_crd[atom_j], scaler[0]);
VECTOR drkl = Get_Periodic_Displacement(uint_crd[atom_k], uint_crd[atom_l], scaler[0]);
VECTOR r1 = drij ^ drkj;
VECTOR r2 = drkl ^ drkj;
float r1_1 = rnorm3df(r1.x, r1.y, r1.z);
float r2_1 = rnorm3df(r2.x, r2.y, r2.z);
float r1_1_r2_1 = r1_1 * r2_1;
float phi = r1 * r2 * r1_1_r2_1;
phi = fmaxf(-0.999999, fminf(phi, 0.999999));
phi = acosf(phi);
float sign = (r2 ^ r1) * drkj;
copysignf(phi, sign);
phi = CONSTANT_Pi - phi;
float nphi = temp_pn * phi;
float cos_nphi = cosf(nphi);
float sin_nphi = sinf(nphi);
atomicAdd(&ene[atom_i], (temp_pk + cos_nphi * temp_gamc + sin_nphi * temp_gams));
}
}
void DihedralAtomEnergy(int dihedral_numbers, int atom_numbers, const int *uint_crd_f, const float *scaler_f,
const int *atom_a, const int *atom_b, const int *atom_c, const int *atom_d, const int *ipn,
const float *pk, const float *gamc, const float *gams, const float *pn, float *ene,
cudaStream_t stream) {
Reset_List<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>(atom_numbers, ene, 0.);
size_t thread_per_block = 128;
size_t block_per_grid = ceilf(static_cast<float>(dihedral_numbers) / 128);
UNSIGNED_INT_VECTOR *uint_crd =
const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f));
VECTOR *scaler = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(scaler_f));
DihedralAtomEnergyKernel<<<block_per_grid, thread_per_block, 0, stream>>>(
dihedral_numbers, uint_crd, scaler, atom_a, atom_b, atom_c, atom_d, ipn, pk, gamc, gams, pn, ene);
return;
}
void DihedralAtomEnergy(int dihedral_numbers, int atom_numbers, const int *uint_crd_f, const float *scaler_f,
const int *atom_a, const int *atom_b, const int *atom_c, const int *atom_d, const int *ipn,
const float *pk, const float *gamc, const float *gams, const float *pn, float *ene,
cudaStream_t stream);

View File

@ -1,28 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_DIHEDRAL_DIHEDRAL_ATOM_ENERGY_IMPL_H_
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_DIHEDRAL_DIHEDRAL_ATOM_ENERGY_IMPL_H_
#include <curand_kernel.h>
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
CUDA_LIB_EXPORT void DihedralAtomEnergy(int dihedral_numbers, int atom_numbers, const int *uint_crd_f,
const float *scaler_f, const int *atom_a, const int *atom_b, const int *atom_c,
const int *atom_d, const int *ipn, const float *pk, const float *gamc,
const float *gams, const float *pn, float *ene, cudaStream_t stream);
#endif

View File

@ -1,81 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "plugin/device/gpu/kernel/cuda_impl/sponge/dihedral/dihedral_energy_impl.cuh"
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/util.cuh"
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common_sponge.cuh"
__global__ void DihedralEnergyKernel(int dihedral_numbers, const UNSIGNED_INT_VECTOR *uint_crd, const VECTOR *scaler,
const int *atom_a, const int *atom_b, const int *atom_c, const int *atom_d,
const int *ipn, const float *pk, const float *gamc, const float *gams,
const float *pn, float *ene) {
int dihedral_i = blockDim.x * blockIdx.x + threadIdx.x;
if (dihedral_i < dihedral_numbers) {
int atom_i = atom_a[dihedral_i];
int atom_j = atom_b[dihedral_i];
int atom_k = atom_c[dihedral_i];
int atom_l = atom_d[dihedral_i];
float temp_pk = pk[dihedral_i];
float temp_pn = pn[dihedral_i];
float temp_gamc = gamc[dihedral_i];
float temp_gams = gams[dihedral_i];
VECTOR drij = Get_Periodic_Displacement(uint_crd[atom_i], uint_crd[atom_j], scaler[0]);
VECTOR drkj = Get_Periodic_Displacement(uint_crd[atom_k], uint_crd[atom_j], scaler[0]);
VECTOR drkl = Get_Periodic_Displacement(uint_crd[atom_k], uint_crd[atom_l], scaler[0]);
VECTOR r1 = drij ^ drkj;
VECTOR r2 = drkl ^ drkj;
float r1_1 = rnorm3df(r1.x, r1.y, r1.z);
float r2_1 = rnorm3df(r2.x, r2.y, r2.z);
float r1_1_r2_1 = r1_1 * r2_1;
float phi = r1 * r2 * r1_1_r2_1;
phi = fmaxf(-0.999999, fminf(phi, 0.999999));
phi = acosf(phi);
float sign = (r2 ^ r1) * drkj;
copysignf(phi, sign);
phi = CONSTANT_Pi - phi;
float nphi = temp_pn * phi;
float cos_nphi = cosf(nphi);
float sin_nphi = sinf(nphi);
ene[dihedral_i] = (temp_pk + cos_nphi * temp_gamc + sin_nphi * temp_gams);
}
}
void DihedralEnergy(int dihedral_numbers, const int *uint_crd_f, const float *scaler_f, const int *atom_a,
const int *atom_b, const int *atom_c, const int *atom_d, const int *ipn, const float *pk,
const float *gamc, const float *gams, const float *pn, float *ene, cudaStream_t stream) {
size_t thread_per_block = 128;
size_t block_per_grid = ceilf(static_cast<float>(dihedral_numbers) / 128);
UNSIGNED_INT_VECTOR *uint_crd =
const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f));
VECTOR *scaler = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(scaler_f));
DihedralEnergyKernel<<<block_per_grid, thread_per_block, 0, stream>>>(
dihedral_numbers, uint_crd, scaler, atom_a, atom_b, atom_c, atom_d, ipn, pk, gamc, gams, pn, ene);
return;
}
void DihedralEnergy(int dihedral_numbers, const int *uint_crd_f, const float *scaler_f, const int *atom_a,
const int *atom_b, const int *atom_c, const int *atom_d, const int *ipn, const float *pk,
const float *gamc, const float *gams, const float *pn, float *ene, cudaStream_t stream);

View File

@ -1,28 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_DIHEDRAL_DIHEDRAL_ENERGY_IMPL_H_
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_DIHEDRAL_DIHEDRAL_ENERGY_IMPL_H_
#include <curand_kernel.h>
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
CUDA_LIB_EXPORT void DihedralEnergy(int dihedral_numbers, const int *uint_crd_f, const float *scaler_f,
const int *atom_a, const int *atom_b, const int *atom_c, const int *atom_d,
const int *ipn, const float *pk, const float *gamc, const float *gams,
const float *pn, float *ene, cudaStream_t stream);
#endif

View File

@ -1,121 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "plugin/device/gpu/kernel/cuda_impl/sponge/dihedral/dihedral_force_impl.cuh"
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/util.cuh"
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common_sponge.cuh"
__global__ void DihedralForceKernel(int dihedral_numbers, const UNSIGNED_INT_VECTOR *uint_crd, const VECTOR *scaler,
const int *atom_a, const int *atom_b, const int *atom_c, const int *atom_d,
const int *ipn, const float *pk, const float *gamc, const float *gams,
const float *pn, VECTOR *frc) {
int dihedral_i = blockDim.x * blockIdx.x + threadIdx.x;
if (dihedral_i < dihedral_numbers) {
int atom_i = atom_a[dihedral_i];
int atom_j = atom_b[dihedral_i];
int atom_k = atom_c[dihedral_i];
int atom_l = atom_d[dihedral_i];
int temp_ipn = ipn[dihedral_i];
float temp_pn = pn[dihedral_i];
float temp_gamc = gamc[dihedral_i];
float temp_gams = gams[dihedral_i];
VECTOR drij = Get_Periodic_Displacement(uint_crd[atom_i], uint_crd[atom_j], scaler[0]);
VECTOR drkj = Get_Periodic_Displacement(uint_crd[atom_k], uint_crd[atom_j], scaler[0]);
VECTOR drkl = Get_Periodic_Displacement(uint_crd[atom_k], uint_crd[atom_l], scaler[0]);
VECTOR r1 = drij ^ drkj;
VECTOR r2 = drkl ^ drkj;
float r1_1 = rnorm3df(r1.x, r1.y, r1.z);
float r2_1 = rnorm3df(r2.x, r2.y, r2.z);
float r1_2 = r1_1 * r1_1;
float r2_2 = r2_1 * r2_1;
float r1_1_r2_1 = r1_1 * r2_1;
float phi = r1 * r2 * r1_1_r2_1;
phi = fmaxf(-0.999999, fminf(phi, 0.999999));
phi = acosf(phi);
float sign = (r2 ^ r1) * drkj;
copysignf(phi, sign);
phi = CONSTANT_Pi - phi;
float nphi = temp_pn * phi;
float cos_phi = cosf(phi);
float sin_phi = sinf(phi);
float cos_nphi = cosf(nphi);
float sin_nphi = sinf(nphi);
float dE_dphi;
if (fabsf(sin_phi) < 1e-6) {
temp_ipn *= temp_ipn % 2; // (((temp_ipn - 1) & 1) ^ 1)
dE_dphi = temp_gamc * (temp_pn - temp_ipn + temp_ipn * cos_phi);
} else {
dE_dphi = temp_pn * (temp_gamc * sin_nphi - temp_gams * cos_nphi) / sin_phi;
}
VECTOR dphi_dr1 = r1_1_r2_1 * r2 + cos_phi * r1_2 * r1;
VECTOR dphi_dr2 = r1_1_r2_1 * r1 + cos_phi * r2_2 * r2;
VECTOR dE_dri = dE_dphi * drkj ^ dphi_dr1;
VECTOR dE_drl = dE_dphi * dphi_dr2 ^ drkj;
VECTOR dE_drj_part = dE_dphi * ((drij ^ dphi_dr1) + (drkl ^ dphi_dr2));
VECTOR fi = dE_dri;
VECTOR fj = dE_drj_part - dE_dri;
VECTOR fk = -dE_drl - dE_drj_part;
VECTOR fl = dE_drl;
atomicAdd(&frc[atom_i].x, fi.x);
atomicAdd(&frc[atom_i].y, fi.y);
atomicAdd(&frc[atom_i].z, fi.z);
atomicAdd(&frc[atom_j].x, fj.x);
atomicAdd(&frc[atom_j].y, fj.y);
atomicAdd(&frc[atom_j].z, fj.z);
atomicAdd(&frc[atom_k].x, fk.x);
atomicAdd(&frc[atom_k].y, fk.y);
atomicAdd(&frc[atom_k].z, fk.z);
atomicAdd(&frc[atom_l].x, fl.x);
atomicAdd(&frc[atom_l].y, fl.y);
atomicAdd(&frc[atom_l].z, fl.z);
}
}
void DihedralForce(int dihedral_numbers, int atom_numbers, const int *uint_crd_f, const float *scaler_f,
const int *atom_a, const int *atom_b, const int *atom_c, const int *atom_d, const int *ipn,
const float *pk, const float *gamc, const float *gams, const float *pn, float *frc_f,
cudaStream_t stream) {
Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128, 0, stream>>>(3 * atom_numbers, frc_f, 0.);
size_t thread_per_block = 128;
size_t block_per_grid = ceilf(static_cast<float>(dihedral_numbers) / 128);
UNSIGNED_INT_VECTOR *uint_crd =
const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f));
VECTOR *frc = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(frc_f));
VECTOR *scaler = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(scaler_f));
DihedralForceKernel<<<block_per_grid, thread_per_block, 0, stream>>>(
dihedral_numbers, uint_crd, scaler, atom_a, atom_b, atom_c, atom_d, ipn, pk, gamc, gams, pn, frc);
return;
}
void DihedralForce(int dihedral_numbers, int atom_numbers, const int *uint_crd_f, const float *scaler_f,
const int *atom_a, const int *atom_b, const int *atom_c, const int *atom_d, const int *ipn,
const float *pk, const float *gamc, const float *gams, const float *pn, float *frc_f,
cudaStream_t stream);

View File

@ -1,28 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_DIHEDRAL_DIHEDRAL_FORCE_IMPL_H_
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_DIHEDRAL_DIHEDRAL_FORCE_IMPL_H_
#include <curand_kernel.h>
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
CUDA_LIB_EXPORT void DihedralForce(int dihedral_numbers, int atom_numbers, const int *uint_crd_f, const float *scaler_f,
const int *atom_a, const int *atom_b, const int *atom_c, const int *atom_d,
const int *ipn, const float *pk, const float *gamc, const float *gams,
const float *pn, float *frc_f, cudaStream_t stream);
#endif

View File

@ -1,125 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "plugin/device/gpu/kernel/cuda_impl/sponge/dihedral/dihedral_force_with_atom_energy_impl.cuh"
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/util.cuh"
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common_sponge.cuh"
__global__ void DihedralForceWithAtomEnergyKernel(int dihedral_numbers, const UNSIGNED_INT_VECTOR *uint_crd,
const VECTOR *scaler, const int *atom_a, const int *atom_b,
const int *atom_c, const int *atom_d, const int *ipn, const float *pk,
const float *gamc, const float *gams, const float *pn, VECTOR *frc,
float *ene) {
int dihedral_i = blockDim.x * blockIdx.x + threadIdx.x;
if (dihedral_i < dihedral_numbers) {
int atom_i = atom_a[dihedral_i];
int atom_j = atom_b[dihedral_i];
int atom_k = atom_c[dihedral_i];
int atom_l = atom_d[dihedral_i];
int temp_ipn = ipn[dihedral_i];
float temp_pk = pk[dihedral_i];
float temp_pn = pn[dihedral_i];
float temp_gamc = gamc[dihedral_i];
float temp_gams = gams[dihedral_i];
VECTOR drij = Get_Periodic_Displacement(uint_crd[atom_i], uint_crd[atom_j], scaler[0]);
VECTOR drkj = Get_Periodic_Displacement(uint_crd[atom_k], uint_crd[atom_j], scaler[0]);
VECTOR drkl = Get_Periodic_Displacement(uint_crd[atom_k], uint_crd[atom_l], scaler[0]);
VECTOR r1 = drij ^ drkj;
VECTOR r2 = drkl ^ drkj;
float r1_1 = rnorm3df(r1.x, r1.y, r1.z);
float r2_1 = rnorm3df(r2.x, r2.y, r2.z);
float r1_2 = r1_1 * r1_1;
float r2_2 = r2_1 * r2_1;
float r1_1_r2_1 = r1_1 * r2_1;
float phi = r1 * r2 * r1_1_r2_1;
phi = fmaxf(-0.999999, fminf(phi, 0.999999));
phi = acosf(phi);
float sign = (r2 ^ r1) * drkj;
copysignf(phi, sign);
phi = CONSTANT_Pi - phi;
float nphi = temp_pn * phi;
float cos_phi = cosf(phi);
float sin_phi = sinf(phi);
float cos_nphi = cosf(nphi);
float sin_nphi = sinf(nphi);
float dE_dphi;
if (fabsf(sin_phi) < 1e-6) {
temp_ipn *= (((temp_ipn - 1) & 1) ^ 1);
dE_dphi = temp_gamc * (temp_pn - temp_ipn + temp_ipn * cos_phi);
} else {
dE_dphi = temp_pn * (temp_gamc * sin_nphi - temp_gams * cos_nphi) / sin_phi;
}
VECTOR dphi_dr1 = r1_1_r2_1 * r2 + cos_phi * r1_2 * r1;
VECTOR dphi_dr2 = r1_1_r2_1 * r1 + cos_phi * r2_2 * r2;
VECTOR dE_dri = dE_dphi * drkj ^ dphi_dr1;
VECTOR dE_drl = dE_dphi * dphi_dr2 ^ drkj;
VECTOR dE_drj_part = dE_dphi * ((drij ^ dphi_dr1) + (drkl ^ dphi_dr2));
VECTOR fi = dE_dri;
VECTOR fj = dE_drj_part - dE_dri;
VECTOR fk = -dE_drl - dE_drj_part;
VECTOR fl = dE_drl;
atomicAdd(&frc[atom_i].x, fi.x);
atomicAdd(&frc[atom_i].y, fi.y);
atomicAdd(&frc[atom_i].z, fi.z);
atomicAdd(&frc[atom_j].x, fj.x);
atomicAdd(&frc[atom_j].y, fj.y);
atomicAdd(&frc[atom_j].z, fj.z);
atomicAdd(&frc[atom_k].x, fk.x);
atomicAdd(&frc[atom_k].y, fk.y);
atomicAdd(&frc[atom_k].z, fk.z);
atomicAdd(&frc[atom_l].x, fl.x);
atomicAdd(&frc[atom_l].y, fl.y);
atomicAdd(&frc[atom_l].z, fl.z);
atomicAdd(&ene[atom_i], (temp_pk + cos_nphi * temp_gamc + sin_nphi * temp_gams));
}
}
void DihedralForceWithAtomEnergy(int dihedral_numbers, int atom_numbers, const int *uint_crd_f, const float *scaler_f,
const int *atom_a, const int *atom_b, const int *atom_c, const int *atom_d,
const int *ipn, const float *pk, const float *gamc, const float *gams, const float *pn,
float *frc_f, float *ene, cudaStream_t stream) {
Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128, 0, stream>>>(3 * atom_numbers, frc_f, 0.);
size_t thread_per_block = 128;
size_t block_per_grid = ceilf(static_cast<float>(dihedral_numbers) / 128);
UNSIGNED_INT_VECTOR *uint_crd =
const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f));
VECTOR *frc = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(frc_f));
VECTOR *scaler = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(scaler_f));
DihedralForceWithAtomEnergyKernel<<<block_per_grid, thread_per_block, 0, stream>>>(
dihedral_numbers, uint_crd, scaler, atom_a, atom_b, atom_c, atom_d, ipn, pk, gamc, gams, pn, frc, ene);
return;
}
void DihedralForceWithAtomEnergy(int dihedral_numbers, int atom_numbers, const int *uint_crd_f, const float *scaler_f,
const int *atom_a, const int *atom_b, const int *atom_c, const int *atom_d,
const int *ipn, const float *pk, const float *gamc, const float *gams, const float *pn,
float *frc_f, float *ene, cudaStream_t stream);

View File

@ -1,29 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_DIHEDRAL_DIHEDRAL_FORCE_WITH_ATOM_ENERGY_IMPL_H_
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_DIHEDRAL_DIHEDRAL_FORCE_WITH_ATOM_ENERGY_IMPL_H_
#include <curand_kernel.h>
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
CUDA_LIB_EXPORT void DihedralForceWithAtomEnergy(int dihedral_numbers, int atom_numbers, const int *uint_crd_f,
const float *scaler_f, const int *atom_a, const int *atom_b,
const int *atom_c, const int *atom_d, const int *ipn, const float *pk,
const float *gamc, const float *gams, const float *pn, float *frc_f,
float *ene, cudaStream_t stream);
#endif

View File

@ -1,144 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Note:
* LJForce. This is an experimental interface that is subject to change and/or deletion.
*/
#include "plugin/device/gpu/kernel/cuda_impl/sponge/lj/lj_direct_cf_force_with_lj_virial_direct_cf_energy_impl.cuh"
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common_sponge.cuh"
__global__ void LJ_Direct_CF_Force_With_LJ_Virial_Direct_CF_Energy_CUDA(
const int atom_numbers, const NEIGHBOR_LIST *nl, const UINT_VECTOR_LJ_TYPE *uint_crd, const VECTOR *boxlength,
const float *LJ_type_A, const float *LJ_type_B, const float cutoff, VECTOR *frc, const float pme_beta,
const float sqrt_pi, float *atom_lj_virial, float *atom_direct_cf_energy) {
int atom_i = blockDim.x * blockIdx.x + threadIdx.x;
if (atom_i < atom_numbers) {
NEIGHBOR_LIST nl_i = nl[atom_i];
int N = nl_i.atom_numbers;
int atom_j;
int int_x;
int int_y;
int int_z;
UINT_VECTOR_LJ_TYPE r1 = uint_crd[atom_i], r2;
VECTOR dr;
float dr_2;
float dr_4;
float dr_8;
float dr_6;
float frc_abs = 0.;
VECTOR frc_lin;
VECTOR frc_record = {0., 0., 0.};
float charge_i = r1.charge;
float charge_j;
float dr_abs;
float dr_1;
float beta_dr;
float frc_cf_abs;
float virial_lin = 0.;
float energy_lin = 0.;
int x, y;
int atom_pair_LJ_type;
for (int j = threadIdx.y; j < N; j = j + blockDim.y) {
atom_j = nl_i.atom_serial[j];
r2 = uint_crd[atom_j];
charge_j = r2.charge;
int_x = r2.uint_x - r1.uint_x;
int_y = r2.uint_y - r1.uint_y;
int_z = r2.uint_z - r1.uint_z;
dr.x = boxlength[0].x * int_x;
dr.y = boxlength[0].y * int_y;
dr.z = boxlength[0].z * int_z;
dr_abs = norm3df(dr.x, dr.y, dr.z);
if (dr_abs < cutoff) {
dr_1 = 1. / dr_abs;
dr_2 = dr_1 * dr_1;
dr_4 = dr_2 * dr_2;
dr_8 = dr_4 * dr_4;
dr_6 = dr_4 * dr_2;
y = (r2.LJ_type - r1.LJ_type);
x = y >> 31;
y = (y ^ x) - x;
x = r2.LJ_type + r1.LJ_type;
r2.LJ_type = (x + y) >> 1;
x = (x - y) >> 1;
atom_pair_LJ_type = (r2.LJ_type * (r2.LJ_type + 1) >> 1) + x;
frc_abs = (-LJ_type_A[atom_pair_LJ_type] * dr_6 + LJ_type_B[atom_pair_LJ_type]) * dr_8;
beta_dr = pme_beta * dr_abs;
frc_cf_abs = beta_dr * sqrt_pi * expf(-beta_dr * beta_dr) + erfcf(beta_dr);
frc_cf_abs = frc_cf_abs * dr_2 * dr_1;
frc_cf_abs = charge_i * charge_j * frc_cf_abs;
energy_lin = energy_lin + charge_i * charge_j * erfcf(beta_dr) * dr_1;
virial_lin = virial_lin - frc_abs * dr_abs * dr_abs;
frc_abs = frc_abs - frc_cf_abs;
frc_lin.x = frc_abs * dr.x;
frc_lin.y = frc_abs * dr.y;
frc_lin.z = frc_abs * dr.z;
frc_record.x = frc_record.x + frc_lin.x;
frc_record.y = frc_record.y + frc_lin.y;
frc_record.z = frc_record.z + frc_lin.z;
atomicAdd(&frc[atom_j].x, -frc_lin.x);
atomicAdd(&frc[atom_j].y, -frc_lin.y);
atomicAdd(&frc[atom_j].z, -frc_lin.z);
}
}
atomicAdd(&frc[atom_i].x, frc_record.x);
atomicAdd(&frc[atom_i].y, frc_record.y);
atomicAdd(&frc[atom_i].z, frc_record.z);
atomicAdd(&atom_direct_cf_energy[atom_i], energy_lin);
atomicAdd(&atom_lj_virial[atom_i], virial_lin);
}
}
void LJ_Direct_CF_Force_With_LJ_Virial_Direct_CF_Energy(
const int atom_numbers, const float cutoff, const float pme_beta, const unsigned int *uint_crd_f, const int *LJtype,
const float *charge, const float *scaler_f, float *uint_crd_with_LJ, int *nl_atom_numbers, int *nl_atom_serial,
int *nl, const float *d_LJ_A, const float *d_LJ_B, float *frc_f, float *atom_lj_virial, float *atom_energy,
int max_neighbor_numbers, cudaStream_t stream) {
Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128, 0, stream>>>(3 * atom_numbers, frc_f, 0.);
Reset_List<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>(atom_numbers, atom_energy, 0.);
Reset_List<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>(atom_numbers, atom_lj_virial, 0.);
VECTOR *frc = reinterpret_cast<VECTOR *>(frc_f);
VECTOR *scaler = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(scaler_f));
NEIGHBOR_LIST *nl_a = reinterpret_cast<NEIGHBOR_LIST *>(nl);
construct_neighbor_list_kernel<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>(
atom_numbers, max_neighbor_numbers, nl_atom_numbers, nl_atom_serial, nl_a);
UINT_VECTOR_LJ_TYPE *uint_crd_with_LJ_a = reinterpret_cast<UINT_VECTOR_LJ_TYPE *>(uint_crd_with_LJ);
UNSIGNED_INT_VECTOR *uint_crd =
const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f));
Copy_Crd_To_New_Crd_Start<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
atom_numbers, uint_crd, uint_crd_with_LJ_a, LJtype, charge);
LJ_Direct_CF_Force_With_LJ_Virial_Direct_CF_Energy_CUDA<<<ceilf(static_cast<float>(atom_numbers) / 8), thread_LJ, 0,
stream>>>(
atom_numbers, nl_a, uint_crd_with_LJ_a, scaler, d_LJ_A, d_LJ_B, cutoff, frc, pme_beta, TWO_DIVIDED_BY_SQRT_PI,
atom_lj_virial, atom_energy);
return;
}
void LJ_Direct_CF_Force_With_LJ_Virial_Direct_CF_Energy(
const int atom_numbers, const float cutoff, const float pme_beta, const unsigned int *uint_crd_f, const int *LJtype,
const float *charge, const float *scaler_f, float *uint_crd_with_LJ, int *nl_atom_numbers, int *nl_atom_serial,
int *nl, const float *d_LJ_A, const float *d_LJ_B, float *frc_f, float *atom_lj_virial, float *atom_energy,
int max_neighbor_numbers, cudaStream_t stream);

View File

@ -1,34 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Note:
* LJForce. This is an experimental interface that is subject to change and/or deletion.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_LJ_DIRECT_CF_FORCE_WITH_LJ_VIRIAL_DIRECT_CF_ENERGY_IMPL_H_
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_LJ_DIRECT_CF_FORCE_WITH_LJ_VIRIAL_DIRECT_CF_ENERGY_IMPL_H_
#include <curand_kernel.h>
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
CUDA_LIB_EXPORT void LJ_Direct_CF_Force_With_LJ_Virial_Direct_CF_Energy(
const int atom_numbers, const float cutoff, const float pme_beta, const unsigned int *uint_crd_f, const int *LJtype,
const float *charge, const float *scaler_f, float *uint_crd_with_LJ, int *nl_atom_numbers, int *nl_atom_serial,
int *nl, const float *d_LJ_A, const float *d_LJ_B, float *frc_f, float *atom_lj_virial, float *atom_energy,
int max_neighbor_numbers, cudaStream_t stream);
#endif

View File

@ -1,102 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "plugin/device/gpu/kernel/cuda_impl/sponge/lj/lj_energy_impl.cuh"
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common_sponge.cuh"
__global__ void LJ_Energy_CUDA(const int atom_numbers, const NEIGHBOR_LIST *nl, const UINT_VECTOR_LJ_TYPE *uint_crd,
const VECTOR *boxlength, const float *LJ_type_A, const float *LJ_type_B,
const float cutoff_square, float *lj_ene) {
int atom_i = blockDim.x * blockIdx.x + threadIdx.x;
if (atom_i < atom_numbers) {
NEIGHBOR_LIST nl_i = nl[atom_i];
int N = nl_i.atom_numbers;
int atom_j;
int int_x;
int int_y;
int int_z;
UINT_VECTOR_LJ_TYPE r1 = uint_crd[atom_i], r2;
VECTOR dr;
float dr2;
float dr_2;
float dr_4;
float dr_6;
float ene_lin = 0.;
int x, y;
int atom_pair_LJ_type;
for (int j = threadIdx.y; j < N; j = j + blockDim.y) {
atom_j = nl_i.atom_serial[j];
r2 = uint_crd[atom_j];
int_x = r2.uint_x - r1.uint_x;
int_y = r2.uint_y - r1.uint_y;
int_z = r2.uint_z - r1.uint_z;
dr.x = boxlength[0].x * int_x;
dr.y = boxlength[0].y * int_y;
dr.z = boxlength[0].z * int_z;
dr2 = dr.x * dr.x + dr.y * dr.y + dr.z * dr.z;
if (dr2 < cutoff_square) {
dr_2 = 1. / dr2;
dr_4 = dr_2 * dr_2;
dr_6 = dr_4 * dr_2;
y = (r2.LJ_type - r1.LJ_type);
x = y >> 31;
y = (y ^ x) - x;
x = r2.LJ_type + r1.LJ_type;
r2.LJ_type = (x + y) >> 1;
x = (x - y) >> 1;
atom_pair_LJ_type = (r2.LJ_type * (r2.LJ_type + 1) >> 1) + x;
dr_2 = (0.083333333 * LJ_type_A[atom_pair_LJ_type] * dr_6 - 0.166666666 * LJ_type_B[atom_pair_LJ_type]) * dr_6;
ene_lin = ene_lin + dr_2;
}
}
atomicAdd(&lj_ene[atom_i], ene_lin);
}
}
void LJEnergy(const int atom_numbers, const float cutoff_square, const int *uint_crd_f, const int *LJtype,
const float *charge, const float *scaler_f, float *uint_crd_with_LJ, int *nl_atom_numbers,
int *nl_atom_serial, int *nl, const float *d_LJ_A, const float *d_LJ_B, float *d_LJ_energy_atom,
cudaStream_t stream) {
VECTOR *scaler = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(scaler_f));
int max_neighbor_numbers = 800;
NEIGHBOR_LIST *nl_a = reinterpret_cast<NEIGHBOR_LIST *>(nl);
construct_neighbor_list_kernel<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>(
atom_numbers, max_neighbor_numbers, nl_atom_numbers, nl_atom_serial, nl_a);
UINT_VECTOR_LJ_TYPE *uint_crd_with_LJ_a = reinterpret_cast<UINT_VECTOR_LJ_TYPE *>(uint_crd_with_LJ);
UNSIGNED_INT_VECTOR *uint_crd =
const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f));
Copy_Crd_To_New_Crd_Start<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
atom_numbers, uint_crd, uint_crd_with_LJ_a, LJtype, charge);
Reset_List<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(atom_numbers, d_LJ_energy_atom, 0.);
LJ_Energy_CUDA<<<ceilf(static_cast<float>(atom_numbers) / 8), thread_LJ, 0, stream>>>(
atom_numbers, nl_a, uint_crd_with_LJ_a, scaler, d_LJ_A, d_LJ_B, cutoff_square, d_LJ_energy_atom);
return;
}
void LJEnergy(const int atom_numbers, const float cutoff_square, const int *uint_crd_f, const int *LJtype,
const float *charge, const float *scaler_f, float *uint_crd_with_LJ, int *nl_atom_numbers,
int *nl_atom_serial, int *nl, const float *d_LJ_A, const float *d_LJ_B, float *d_LJ_energy_atom,
cudaStream_t stream);

View File

@ -1,28 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_LJ_LJ_ENERGY_IMPL_H_
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_LJ_LJ_ENERGY_IMPL_H_
#include <curand_kernel.h>
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
CUDA_LIB_EXPORT void LJEnergy(const int atom_numbers, const float cutoff_square, const int *uint_crd_f,
const int *LJtype, const float *charge, const float *scaler_f, float *uint_crd_with_LJ,
int *nl_atom_numbers, int *nl_atom_serial, int *nl, const float *d_LJ_A,
const float *d_LJ_B, float *d_LJ_energy_atom, cudaStream_t stream);
#endif

View File

@ -1,117 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "plugin/device/gpu/kernel/cuda_impl/sponge/lj/lj_force_impl.cuh"
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common_sponge.cuh"
__global__ void LJ_Force_CUDA(const int atom_numbers, const NEIGHBOR_LIST *nl, const UINT_VECTOR_LJ_TYPE *uint_crd,
const VECTOR *boxlength, const float *LJ_type_A, const float *LJ_type_B,
const float cutoff_square, VECTOR *frc) {
int atom_i = blockDim.x * blockIdx.x + threadIdx.x;
if (atom_i < atom_numbers) {
NEIGHBOR_LIST nl_i = nl[atom_i];
int N = nl_i.atom_numbers;
int B = ceilf(static_cast<float>(N) / blockDim.y);
int atom_j;
int int_x;
int int_y;
int int_z;
UINT_VECTOR_LJ_TYPE r1 = uint_crd[atom_i], r2;
VECTOR dr;
float dr2;
float dr_2;
float dr_4;
float dr_8;
float dr_14;
float frc_abs = 0.;
VECTOR frc_lin;
VECTOR frc_record = {0., 0., 0.};
int x, y;
int atom_pair_LJ_type;
for (int j = threadIdx.y * B; j < (threadIdx.y + 1) * B; j = j + 1) {
if (j < N) {
atom_j = nl_i.atom_serial[j];
r2 = uint_crd[atom_j];
int_x = r2.uint_x - r1.uint_x;
int_y = r2.uint_y - r1.uint_y;
int_z = r2.uint_z - r1.uint_z;
dr.x = boxlength[0].x * int_x;
dr.y = boxlength[0].y * int_y;
dr.z = boxlength[0].z * int_z;
dr2 = dr.x * dr.x + dr.y * dr.y + dr.z * dr.z;
if (dr2 < cutoff_square) {
dr_2 = 1. / dr2;
dr_4 = dr_2 * dr_2;
dr_8 = dr_4 * dr_4;
dr_14 = dr_8 * dr_4 * dr_2;
y = (r2.LJ_type - r1.LJ_type);
x = y >> 31;
y = (y ^ x) - x;
x = r2.LJ_type + r1.LJ_type;
r2.LJ_type = (x + y) >> 1;
x = (x - y) >> 1;
atom_pair_LJ_type = (r2.LJ_type * (r2.LJ_type + 1) >> 1) + x;
frc_abs = -LJ_type_A[atom_pair_LJ_type] * dr_14 + LJ_type_B[atom_pair_LJ_type] * dr_8;
frc_lin.x = frc_abs * dr.x;
frc_lin.y = frc_abs * dr.y;
frc_lin.z = frc_abs * dr.z;
frc_record.x = frc_record.x + frc_lin.x;
frc_record.y = frc_record.y + frc_lin.y;
frc_record.z = frc_record.z + frc_lin.z;
atomicAdd(&frc[atom_j].x, -frc_lin.x);
atomicAdd(&frc[atom_j].y, -frc_lin.y);
atomicAdd(&frc[atom_j].z, -frc_lin.z);
}
}
}
atomicAdd(&frc[atom_i].x, frc_record.x);
atomicAdd(&frc[atom_i].y, frc_record.y);
atomicAdd(&frc[atom_i].z, frc_record.z);
}
}
void LJForce(const int atom_numbers, const float cutoff_square, const int *uint_crd_f, const int *LJtype,
const float *charge, const float *scaler_f, float *uint_crd_with_LJ, int *nl_atom_numbers,
int *nl_atom_serial, int *nl, const float *d_LJ_A, const float *d_LJ_B, float *frc_f,
cudaStream_t stream) {
Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128, 0, stream>>>(3 * atom_numbers, frc_f, 0.);
VECTOR *frc = reinterpret_cast<VECTOR *>(frc_f);
VECTOR *scaler = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(scaler_f));
int max_neighbor_numbers = 800;
NEIGHBOR_LIST *nl_a = reinterpret_cast<NEIGHBOR_LIST *>(nl);
construct_neighbor_list_kernel<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>(
atom_numbers, max_neighbor_numbers, nl_atom_numbers, nl_atom_serial, nl_a);
UINT_VECTOR_LJ_TYPE *uint_crd_with_LJ_a = reinterpret_cast<UINT_VECTOR_LJ_TYPE *>(uint_crd_with_LJ);
UNSIGNED_INT_VECTOR *uint_crd =
const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f));
Copy_Crd_To_New_Crd_Start<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
atom_numbers, uint_crd, uint_crd_with_LJ_a, LJtype, charge);
LJ_Force_CUDA<<<ceilf(static_cast<float>(atom_numbers) / 8), thread_LJ, 0, stream>>>(
atom_numbers, nl_a, uint_crd_with_LJ_a, scaler, d_LJ_A, d_LJ_B, cutoff_square, frc);
return;
}
void LJForce(const int atom_numbers, const float cutoff_square, const int *uint_crd_f, const int *LJtype,
const float *charge, const float *scaler_f, float *uint_crd_with_LJ, int *nl_atom_numbers,
int *nl_atom_serial, int *nl, const float *d_LJ_A, const float *d_LJ_B, float *frc_f, cudaStream_t stream);

View File

@ -1,29 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_LJ_LJ_FORCE_IMPL_H_
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_LJ_LJ_FORCE_IMPL_H_
#include <curand_kernel.h>
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
CUDA_LIB_EXPORT void LJForce(const int atom_numbers, const float cutoff_square, const int *uint_crd_f,
const int *LJtype, const float *charge, const float *scaler_f, float *uint_crd_with_LJ,
int *nl_atom_numbers, int *nl_atom_serial, int *nl, const float *d_LJ_A,
const float *d_LJ_B, float *frc_f, cudaStream_t stream);
#endif

View File

@ -1,133 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "plugin/device/gpu/kernel/cuda_impl/sponge/lj/lj_force_with_pme_direct_force_impl.cuh"
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common_sponge.cuh"
__global__ void LJ_Force_With_Direct_CF_CUDA(const int atom_numbers, const NEIGHBOR_LIST *nl,
const UINT_VECTOR_LJ_TYPE *uint_crd, const VECTOR *boxlength,
const float *LJ_type_A, const float *LJ_type_B, const float cutoff,
VECTOR *frc, const float pme_beta, const float sqrt_pi) {
int atom_i = blockDim.x * blockIdx.x + threadIdx.x;
if (atom_i < atom_numbers) {
NEIGHBOR_LIST nl_i = nl[atom_i];
int N = nl_i.atom_numbers;
int atom_j;
int int_x;
int int_y;
int int_z;
UINT_VECTOR_LJ_TYPE r1 = uint_crd[atom_i], r2;
VECTOR dr;
float dr_2;
float dr_4;
float dr_8;
float dr_6;
float frc_abs = 0.;
VECTOR frc_lin;
VECTOR frc_record = {0., 0., 0.};
float charge_i = r1.charge;
float charge_j;
float dr_abs;
float dr_1;
float beta_dr;
float frc_cf_abs;
int x, y;
int atom_pair_LJ_type;
for (int j = threadIdx.y; j < N; j = j + blockDim.y) {
atom_j = nl_i.atom_serial[j];
r2 = uint_crd[atom_j];
charge_j = r2.charge;
int_x = r2.uint_x - r1.uint_x;
int_y = r2.uint_y - r1.uint_y;
int_z = r2.uint_z - r1.uint_z;
dr.x = boxlength[0].x * int_x;
dr.y = boxlength[0].y * int_y;
dr.z = boxlength[0].z * int_z;
dr_abs = norm3df(dr.x, dr.y, dr.z);
if (dr_abs < cutoff) {
dr_1 = 1. / dr_abs;
dr_2 = dr_1 * dr_1;
dr_4 = dr_2 * dr_2;
dr_8 = dr_4 * dr_4;
dr_6 = dr_4 * dr_2;
y = (r2.LJ_type - r1.LJ_type);
x = y >> 31;
y = (y ^ x) - x;
x = r2.LJ_type + r1.LJ_type;
r2.LJ_type = (x + y) >> 1;
x = (x - y) >> 1;
atom_pair_LJ_type = (r2.LJ_type * (r2.LJ_type + 1) >> 1) + x;
frc_abs = (-LJ_type_A[atom_pair_LJ_type] * dr_6 + LJ_type_B[atom_pair_LJ_type]) * dr_8;
beta_dr = pme_beta * dr_abs;
frc_cf_abs = beta_dr * sqrt_pi * expf(-beta_dr * beta_dr) + erfcf(beta_dr);
frc_cf_abs = frc_cf_abs * dr_2 * dr_1;
frc_cf_abs = charge_i * charge_j * frc_cf_abs;
frc_abs = frc_abs - frc_cf_abs;
frc_lin.x = frc_abs * dr.x;
frc_lin.y = frc_abs * dr.y;
frc_lin.z = frc_abs * dr.z;
frc_record.x = frc_record.x + frc_lin.x;
frc_record.y = frc_record.y + frc_lin.y;
frc_record.z = frc_record.z + frc_lin.z;
atomicAdd(&frc[atom_j].x, -frc_lin.x);
atomicAdd(&frc[atom_j].y, -frc_lin.y);
atomicAdd(&frc[atom_j].z, -frc_lin.z);
}
}
atomicAdd(&frc[atom_i].x, frc_record.x);
atomicAdd(&frc[atom_i].y, frc_record.y);
atomicAdd(&frc[atom_i].z, frc_record.z);
}
}
void LJForceWithPMEDirectForce(const int atom_numbers, const float cutoff, const float pme_beta, const int *uint_crd_f,
const int *LJtype, const float *charge, const float *scaler_f, float *uint_crd_with_LJ,
int *nl_atom_numbers, int *nl_atom_serial, int *nl, const float *d_LJ_A,
const float *d_LJ_B, float *frc_f, cudaStream_t stream) {
Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128, 0, stream>>>(3 * atom_numbers, frc_f, 0.);
VECTOR *frc = reinterpret_cast<VECTOR *>(frc_f);
VECTOR *scaler = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(scaler_f));
int max_neighbor_numbers = 800;
NEIGHBOR_LIST *nl_a = reinterpret_cast<NEIGHBOR_LIST *>(nl);
construct_neighbor_list_kernel<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>(
atom_numbers, max_neighbor_numbers, nl_atom_numbers, nl_atom_serial, nl_a);
UINT_VECTOR_LJ_TYPE *uint_crd_with_LJ_a = reinterpret_cast<UINT_VECTOR_LJ_TYPE *>(uint_crd_with_LJ);
UNSIGNED_INT_VECTOR *uint_crd =
const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f));
Copy_Crd_To_New_Crd_Start<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
atom_numbers, uint_crd, uint_crd_with_LJ_a, LJtype, charge);
LJ_Force_With_Direct_CF_CUDA<<<ceilf(static_cast<float>(atom_numbers) / 8), thread_LJ, 0, stream>>>(
atom_numbers, nl_a, uint_crd_with_LJ_a, scaler, d_LJ_A, d_LJ_B, cutoff, frc, pme_beta, TWO_DIVIDED_BY_SQRT_PI);
return;
}
void LJForceWithPMEDirectForce(const int atom_numbers, const float cutoff, const float pme_beta, const int *uint_crd_f,
const int *LJtype, const float *charge, const float *scaler_f, float *uint_crd_with_LJ,
int *nl_atom_numbers, int *nl_atom_serial, int *nl, const float *d_LJ_A,
const float *d_LJ_B, float *frc_f, cudaStream_t stream);

View File

@ -1,30 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_LJ_LJ_FORCE_WITH_PME_DIRECT_FORCE_IMPL_H_
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_LJ_LJ_FORCE_WITH_PME_DIRECT_FORCE_IMPL_H_
#include <curand_kernel.h>
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
CUDA_LIB_EXPORT void LJForceWithPMEDirectForce(const int atom_numbers, const float cutoff, const float pme_beta,
const int *uint_crd_f, const int *LJtype, const float *charge,
const float *scaler_f, float *uint_crd_with_LJ, int *nl_atom_numbers,
int *nl_atom_serial, int *nl, const float *d_LJ_A, const float *d_LJ_B,
float *frc_f, cudaStream_t stream);
#endif

View File

@ -1,147 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "plugin/device/gpu/kernel/cuda_impl/sponge/lj/lj_pme_direct_force_with_atom_energy_impl.cuh"
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common_sponge.cuh"
__global__ void LJ_Direct_CF_Force_With_Atom_Energy_CUDA(const int atom_numbers, const NEIGHBOR_LIST *nl,
const UINT_VECTOR_LJ_TYPE *uint_crd, const VECTOR *boxlength,
const float *LJ_type_A, const float *LJ_type_B,
const float cutoff, VECTOR *frc, const float pme_beta,
const float sqrt_pi, float *atom_energy) {
int atom_i = blockDim.x * blockIdx.x + threadIdx.x;
if (atom_i < atom_numbers) {
NEIGHBOR_LIST nl_i = nl[atom_i];
int N = nl_i.atom_numbers;
int atom_j;
int int_x;
int int_y;
int int_z;
UINT_VECTOR_LJ_TYPE r1 = uint_crd[atom_i], r2;
VECTOR dr;
float dr_2;
float dr_4;
float dr_8;
float dr_6;
float frc_abs = 0.;
VECTOR frc_lin;
VECTOR frc_record = {0., 0., 0.};
float charge_i = r1.charge;
float charge_j;
float dr_abs;
float dr_1;
float beta_dr;
float frc_cf_abs;
float ene_lin = 0.;
float ene_lin2 = 0.;
int x, y;
int atom_pair_LJ_type;
for (int j = threadIdx.y; j < N; j = j + blockDim.y) {
atom_j = nl_i.atom_serial[j];
r2 = uint_crd[atom_j];
charge_j = r2.charge;
int_x = r2.uint_x - r1.uint_x;
int_y = r2.uint_y - r1.uint_y;
int_z = r2.uint_z - r1.uint_z;
dr.x = boxlength[0].x * int_x;
dr.y = boxlength[0].y * int_y;
dr.z = boxlength[0].z * int_z;
dr_abs = norm3df(dr.x, dr.y, dr.z);
if (dr_abs < cutoff) {
dr_1 = 1. / dr_abs;
dr_2 = dr_1 * dr_1;
dr_4 = dr_2 * dr_2;
dr_8 = dr_4 * dr_4;
dr_6 = dr_4 * dr_2;
y = (r2.LJ_type - r1.LJ_type);
x = y >> 31;
y = (y ^ x) - x;
x = r2.LJ_type + r1.LJ_type;
r2.LJ_type = (x + y) >> 1;
x = (x - y) >> 1;
atom_pair_LJ_type = (r2.LJ_type * (r2.LJ_type + 1) >> 1) + x;
frc_abs = (-LJ_type_A[atom_pair_LJ_type] * dr_6 + LJ_type_B[atom_pair_LJ_type]) * dr_8;
beta_dr = pme_beta * dr_abs;
frc_cf_abs = beta_dr * sqrt_pi * expf(-beta_dr * beta_dr) + erfcf(beta_dr);
frc_cf_abs = frc_cf_abs * dr_2 * dr_1;
frc_cf_abs = charge_i * charge_j * frc_cf_abs;
ene_lin2 = ene_lin2 + charge_i * charge_j * erfcf(beta_dr) * dr_1;
ene_lin =
ene_lin +
(0.083333333 * LJ_type_A[atom_pair_LJ_type] * dr_6 - 0.166666666 * LJ_type_B[atom_pair_LJ_type]) * dr_6;
frc_abs = frc_abs - frc_cf_abs;
frc_lin.x = frc_abs * dr.x;
frc_lin.y = frc_abs * dr.y;
frc_lin.z = frc_abs * dr.z;
frc_record.x = frc_record.x + frc_lin.x;
frc_record.y = frc_record.y + frc_lin.y;
frc_record.z = frc_record.z + frc_lin.z;
atomicAdd(&frc[atom_j].x, -frc_lin.x);
atomicAdd(&frc[atom_j].y, -frc_lin.y);
atomicAdd(&frc[atom_j].z, -frc_lin.z);
}
}
atomicAdd(&frc[atom_i].x, frc_record.x);
atomicAdd(&frc[atom_i].y, frc_record.y);
atomicAdd(&frc[atom_i].z, frc_record.z);
atomicAdd(&atom_energy[atom_i], ene_lin + ene_lin2);
}
}
void LJDirectCFForceWithAtomEnergy(const int atom_numbers, const float cutoff, const float pme_beta,
const int *uint_crd_f, const int *LJtype, const float *charge, const float *scaler_f,
float *uint_crd_with_LJ, int *nl_atom_numbers, int *nl_atom_serial, int *nl,
const float *d_LJ_A, const float *d_LJ_B, float *frc_f, float *atom_energy,
cudaStream_t stream) {
Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128, 0, stream>>>(3 * atom_numbers, frc_f, 0.);
VECTOR *frc = reinterpret_cast<VECTOR *>(frc_f);
VECTOR *scaler = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(scaler_f));
int max_neighbor_numbers = 800;
NEIGHBOR_LIST *nl_a = reinterpret_cast<NEIGHBOR_LIST *>(nl);
construct_neighbor_list_kernel<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>(
atom_numbers, max_neighbor_numbers, nl_atom_numbers, nl_atom_serial, nl_a);
UINT_VECTOR_LJ_TYPE *uint_crd_with_LJ_a = reinterpret_cast<UINT_VECTOR_LJ_TYPE *>(uint_crd_with_LJ);
UNSIGNED_INT_VECTOR *uint_crd =
const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f));
Copy_Crd_To_New_Crd_Start<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
atom_numbers, uint_crd, uint_crd_with_LJ_a, LJtype, charge);
LJ_Direct_CF_Force_With_Atom_Energy_CUDA<<<ceilf(static_cast<float>(atom_numbers) / 8), thread_LJ, 0, stream>>>(
atom_numbers, nl_a, uint_crd_with_LJ_a, scaler, d_LJ_A, d_LJ_B, cutoff, frc, pme_beta, TWO_DIVIDED_BY_SQRT_PI,
atom_energy);
return;
}
void LJDirectCFForceWithAtomEnergy(const int atom_numbers, const float cutoff, const float pme_beta,
const int *uint_crd_f, const int *LJtype, const float *charge, const float *scaler_f,
float *uint_crd_with_LJ, int *nl_atom_numbers, int *nl_atom_serial, int *nl,
const float *d_LJ_A, const float *d_LJ_B, float *frc_f, float *atom_energy,
cudaStream_t stream);

View File

@ -1,31 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_LJ_LJ_PME_DIRECT_FORCE_WITH_ATOM_ENERGY_IMPL_H_
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_LJ_LJ_PME_DIRECT_FORCE_WITH_ATOM_ENERGY_IMPL_H_
#include <curand_kernel.h>
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
CUDA_LIB_EXPORT void LJDirectCFForceWithAtomEnergy(const int atom_numbers, const float cutoff, const float pme_beta,
const int *uint_crd_f, const int *LJtype, const float *charge,
const float *scaler_f, float *uint_crd_with_LJ, int *nl_atom_numbers,
int *nl_atom_serial, int *nl, const float *d_LJ_A,
const float *d_LJ_B, float *frc_f, float *atom_energy,
cudaStream_t stream);
#endif

View File

@ -1,78 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "plugin/device/gpu/kernel/cuda_impl/sponge/nb14/dihedral_14_cf_atom_energy_impl.cuh"
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common_sponge.cuh"
__global__ void Dihedral14CFAtomEnergyKernel(const int dihedral_14_numbers, const UINT_VECTOR_LJ_TYPE *uint_crd,
const VECTOR *boxlength, const int *a_14, const int *b_14,
const float *cf_scale_factor, float *ene) {
int dihedral_14_i = blockDim.x * blockIdx.x + threadIdx.x;
if (dihedral_14_i < dihedral_14_numbers) {
int atom_i = a_14[dihedral_14_i];
int atom_j = b_14[dihedral_14_i];
UINT_VECTOR_LJ_TYPE r1 = uint_crd[atom_i];
UINT_VECTOR_LJ_TYPE r2 = uint_crd[atom_j];
int int_x;
int int_y;
int int_z;
VECTOR dr;
float r_1;
float ene_lin = 0.;
int_x = r2.uint_x - r1.uint_x;
int_y = r2.uint_y - r1.uint_y;
int_z = r2.uint_z - r1.uint_z;
dr.x = boxlength[0].x * int_x;
dr.y = boxlength[0].y * int_y;
dr.z = boxlength[0].z * int_z;
r_1 = rnorm3df(dr.x, dr.y, dr.z);
ene_lin = r1.charge * r2.charge * r_1;
ene_lin *= cf_scale_factor[dihedral_14_i];
atomicAdd(&ene[atom_i], ene_lin);
}
}
void Dihedral14CFAtomEnergy(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f,
const int *LJtype, const float *charge, const float *boxlength_f, const int *a_14,
const int *b_14, const float *cf_scale_factor, float *ene, cudaStream_t stream) {
size_t thread_per_block = 128;
size_t block_per_grid = ceilf(static_cast<float>(atom_numbers) / 128);
UINT_VECTOR_LJ_TYPE *uint_crd_with_LJ = NULL;
Cuda_Malloc_Safely(reinterpret_cast<void **>(&uint_crd_with_LJ), sizeof(UINT_VECTOR_LJ_TYPE) * atom_numbers);
UNSIGNED_INT_VECTOR *uint_crd =
const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f));
Copy_Crd_To_New_Crd_Start<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
atom_numbers, uint_crd, uint_crd_with_LJ, LJtype, charge);
VECTOR *boxlength = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(boxlength_f));
Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128, 0, stream>>>(atom_numbers, ene, 0.);
Dihedral14CFAtomEnergyKernel<<<block_per_grid, thread_per_block, 0, stream>>>(
dihedral_14_numbers, uint_crd_with_LJ, boxlength, a_14, b_14, cf_scale_factor, ene);
return;
}
void Dihedral14CFAtomEnergy(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f,
const int *LJtype, const float *charge, const float *boxlength_f, const int *a_14,
const int *b_14, const float *cf_scale_factor, float *ene, cudaStream_t stream);

View File

@ -1,27 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_CF_ATOM_ENERGY_IMPL_H
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_CF_ATOM_ENERGY_IMPL_H
#include <curand_kernel.h>
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
CUDA_LIB_EXPORT void Dihedral14CFAtomEnergy(const int dihedral_14_numbers, const int atom_numbers,
const int *uint_crd_f, const int *LJtype, const float *charge,
const float *boxlength_f, const int *a_14, const int *b_14,
const float *cf_scale_factor, float *ene, cudaStream_t stream);
#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_CF_ENERGY_IMPL_H

View File

@ -1,78 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "plugin/device/gpu/kernel/cuda_impl/sponge/nb14/dihedral_14_cf_energy_impl.cuh"
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common_sponge.cuh"
__global__ void Dihedral14CFEnergyKernel(const int dihedral_14_numbers, const UINT_VECTOR_LJ_TYPE *uint_crd,
const VECTOR *boxlength, const int *a_14, const int *b_14,
const float *cf_scale_factor, float *ene) {
int dihedral_14_i = blockDim.x * blockIdx.x + threadIdx.x;
if (dihedral_14_i < dihedral_14_numbers) {
int atom_i = a_14[dihedral_14_i];
int atom_j = b_14[dihedral_14_i];
UINT_VECTOR_LJ_TYPE r1 = uint_crd[atom_i];
UINT_VECTOR_LJ_TYPE r2 = uint_crd[atom_j];
int int_x;
int int_y;
int int_z;
VECTOR dr;
float r_1;
float ene_lin = 0.;
int_x = r2.uint_x - r1.uint_x;
int_y = r2.uint_y - r1.uint_y;
int_z = r2.uint_z - r1.uint_z;
dr.x = boxlength[0].x * int_x;
dr.y = boxlength[0].y * int_y;
dr.z = boxlength[0].z * int_z;
r_1 = rnorm3df(dr.x, dr.y, dr.z);
ene_lin = r1.charge * r2.charge * r_1;
ene_lin *= cf_scale_factor[dihedral_14_i];
ene[dihedral_14_i] = ene_lin;
}
}
void Dihedral14CFEnergy(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f, const int *LJtype,
const float *charge, float *uint_crd_with_LJ_f, const float *boxlength_f, const int *a_14,
const int *b_14, const float *cf_scale_factor, float *ene, cudaStream_t stream) {
size_t thread_per_block = 32;
size_t block_per_grid = ceilf(static_cast<float>(dihedral_14_numbers) / 32);
UNSIGNED_INT_VECTOR *uint_crd =
const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f));
UINT_VECTOR_LJ_TYPE *uint_crd_with_LJ = reinterpret_cast<UINT_VECTOR_LJ_TYPE *>(uint_crd_with_LJ_f);
Copy_Crd_To_New_Crd_Start<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
atom_numbers, uint_crd, uint_crd_with_LJ, LJtype, charge);
VECTOR *boxlength = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(boxlength_f));
Dihedral14CFEnergyKernel<<<block_per_grid, thread_per_block, 0, stream>>>(
dihedral_14_numbers, uint_crd_with_LJ, boxlength, a_14, b_14, cf_scale_factor, ene);
return;
}
void Dihedral14CFEnergy(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f, const int *LJtype,
const float *charge, float *uint_crd_with_LJ_f, const float *boxlength_f, const int *a_14,
const int *b_14, const float *cf_scale_factor, float *ene, cudaStream_t stream);

View File

@ -1,27 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_CF_ENERGY_IMPL_H
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_CF_ENERGY_IMPL_H
#include <curand_kernel.h>
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
CUDA_LIB_EXPORT void Dihedral14CFEnergy(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f,
const int *LJtype, const float *charge, float *uint_crd_with_LJ_f,
const float *boxlength_f, const int *a_14, const int *b_14,
const float *cf_scale_factor, float *ene, cudaStream_t stream);
#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_CF_ENERGY_IMPL_H

View File

@ -1,102 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "plugin/device/gpu/kernel/cuda_impl/sponge/nb14/dihedral_14_lj_atom_energy_impl.cuh"
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common_sponge.cuh"
__global__ void Dihedral14LJAtomEnergyKernel(const int dihedral_14_numbers, const UINT_VECTOR_LJ_TYPE *uint_crd,
const VECTOR *boxlength, const int *a_14, const int *b_14,
const float *lj_scale_factor, const float *LJ_type_A,
const float *LJ_type_B, float *ene) {
int dihedral_14_i = blockDim.x * blockIdx.x + threadIdx.x;
if (dihedral_14_i < dihedral_14_numbers) {
int atom_i = a_14[dihedral_14_i];
int atom_j = b_14[dihedral_14_i];
UINT_VECTOR_LJ_TYPE r1 = uint_crd[atom_i];
UINT_VECTOR_LJ_TYPE r2 = uint_crd[atom_j];
int int_x;
int int_y;
int int_z;
VECTOR dr;
float dr2;
float dr_2;
float dr_4;
float dr_6;
float dr_12;
float ene_lin = 0.;
int x, y;
int atom_pair_LJ_type;
int_x = r2.uint_x - r1.uint_x;
int_y = r2.uint_y - r1.uint_y;
int_z = r2.uint_z - r1.uint_z;
dr.x = boxlength[0].x * int_x;
dr.y = boxlength[0].y * int_y;
dr.z = boxlength[0].z * int_z;
dr2 = dr.x * dr.x + dr.y * dr.y + dr.z * dr.z;
dr_2 = 1. / dr2;
dr_4 = dr_2 * dr_2;
dr_6 = dr_4 * dr_2;
dr_12 = dr_6 * dr_6;
y = (r2.LJ_type - r1.LJ_type);
x = y >> 31;
y = (y ^ x) - x;
x = r2.LJ_type + r1.LJ_type;
r2.LJ_type = (x + y) >> 1;
x = (x - y) >> 1;
atom_pair_LJ_type = (r2.LJ_type * (r2.LJ_type + 1) >> 1) + x;
ene_lin = 0.08333333 * LJ_type_A[atom_pair_LJ_type] * dr_12 -
0.1666666 * LJ_type_B[atom_pair_LJ_type] * dr_6; // LJ的A,B系数已经乘以12和6因此要反乘
ene_lin *= lj_scale_factor[dihedral_14_i];
atomicAdd(&ene[atom_i], ene_lin);
}
}
void Dihedral14LJAtomEnergy(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f,
const int *LJtype, const float *charge, const float *boxlength_f, const int *a_14,
const int *b_14, const float *lj_scale_factor, const float *LJ_type_A,
const float *LJ_type_B, float *ene, cudaStream_t stream) {
size_t thread_per_block = 128;
size_t block_per_grid = ceilf(static_cast<float>(atom_numbers) / 128);
UINT_VECTOR_LJ_TYPE *uint_crd_with_LJ = NULL;
Cuda_Malloc_Safely(reinterpret_cast<void **>(&uint_crd_with_LJ), sizeof(UINT_VECTOR_LJ_TYPE) * atom_numbers);
UNSIGNED_INT_VECTOR *uint_crd =
const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f));
Copy_Crd_To_New_Crd_Start<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
atom_numbers, uint_crd, uint_crd_with_LJ, LJtype, charge);
VECTOR *boxlength = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(boxlength_f));
Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128, 0, stream>>>(atom_numbers, ene, 0.);
Dihedral14LJAtomEnergyKernel<<<block_per_grid, thread_per_block, 0, stream>>>(
dihedral_14_numbers, uint_crd_with_LJ, boxlength, a_14, b_14, lj_scale_factor, LJ_type_A, LJ_type_B, ene);
cudaStreamSynchronize(stream);
return;
}
void Dihedral14LJAtomEnergy(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f,
const int *LJtype, const float *charge, const float *boxlength_f, const int *a_14,
const int *b_14, const float *lj_scale_factor, const float *LJ_type_A,
const float *LJ_type_B, float *ene, cudaStream_t stream);

View File

@ -1,28 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_LJ_ATOM_ENERGY_IMPL_H
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_LJ_ATOM_ENERGY_IMPL_H
#include <curand_kernel.h>
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
CUDA_LIB_EXPORT void Dihedral14LJAtomEnergy(const int dihedral_14_numbers, const int atom_numbers,
const int *uint_crd_f, const int *LJtype, const float *charge,
const float *boxlength_f, const int *a_14, const int *b_14,
const float *lj_scale_factor, const float *LJ_type_A,
const float *LJ_type_B, float *ene, cudaStream_t stream);
#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_LJ_ATOM_ENERGY_IMPL_H

View File

@ -1,139 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "plugin/device/gpu/kernel/cuda_impl/sponge/nb14/dihedral_14_lj_cf_force_with_atom_energy_and_virial_impl.cuh"
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common_sponge.cuh"
__global__ void Dihedral14LJCFForceWithAtomEnergyAndVirialKernel(
const int dihedral_14_numbers, const UINT_VECTOR_LJ_TYPE *uint_crd, const VECTOR *scaler, const int *a_14,
const int *b_14, const float *lj_scale_factor, const float *cf_scale_factor, const float *LJ_type_A,
const float *LJ_type_B, VECTOR *frc, float *atom_energy, float *atom_virial) {
int dihedral_14_i = blockDim.x * blockIdx.x + threadIdx.x;
if (dihedral_14_i < dihedral_14_numbers) {
UINT_VECTOR_LJ_TYPE r1, r2;
VECTOR dr;
float dr_abs;
float dr2;
float dr_1;
float dr_2;
float dr_4;
float dr_8;
float dr_14;
float frc_abs = 0.;
VECTOR temp_frc;
float ene_lin;
float ene_lin2;
int x, y;
int atom_pair_LJ_type;
int atom_i = a_14[dihedral_14_i];
int atom_j = b_14[dihedral_14_i];
r1 = uint_crd[atom_i];
r2 = uint_crd[atom_j];
dr = Get_Periodic_Displacement(r2, r1, scaler[0]);
dr2 = dr.x * dr.x + dr.y * dr.y + dr.z * dr.z;
dr_2 = 1.0 / dr2;
dr_4 = dr_2 * dr_2;
dr_8 = dr_4 * dr_4;
dr_14 = dr_8 * dr_4 * dr_2;
dr_abs = norm3df(dr.x, dr.y, dr.z);
dr_1 = 1. / dr_abs;
// CF
float charge_i = r1.charge;
float charge_j = r2.charge;
float frc_cf_abs;
frc_cf_abs = cf_scale_factor[dihedral_14_i] * dr_2 * dr_1;
frc_cf_abs = -charge_i * charge_j * frc_cf_abs;
// LJ
y = (r2.LJ_type - r1.LJ_type);
x = y >> 31;
y = (y ^ x) - x;
x = r2.LJ_type + r1.LJ_type;
r2.LJ_type = (x + y) >> 1;
x = (x - y) >> 1;
atom_pair_LJ_type = (r2.LJ_type * (r2.LJ_type + 1) >> 1) + x;
frc_abs = -LJ_type_A[atom_pair_LJ_type] * dr_14 + LJ_type_B[atom_pair_LJ_type] * dr_8;
frc_abs *= lj_scale_factor[dihedral_14_i];
frc_abs += frc_cf_abs;
temp_frc.x = frc_abs * dr.x;
temp_frc.y = frc_abs * dr.y;
temp_frc.z = frc_abs * dr.z;
atomicAdd(&frc[atom_j].x, -temp_frc.x);
atomicAdd(&frc[atom_j].y, -temp_frc.y);
atomicAdd(&frc[atom_j].z, -temp_frc.z);
atomicAdd(&frc[atom_i].x, temp_frc.x);
atomicAdd(&frc[atom_i].y, temp_frc.y);
atomicAdd(&frc[atom_i].z, temp_frc.z);
ene_lin = r1.charge * r2.charge * dr_1;
ene_lin *= cf_scale_factor[dihedral_14_i];
ene_lin2 = 0.08333333 * LJ_type_A[atom_pair_LJ_type] * dr_4 * dr_8 -
0.1666666 * LJ_type_B[atom_pair_LJ_type] * dr_4 * dr_2; // LJ的A,B系数已经乘以12和6因此要反乘
ene_lin2 *= lj_scale_factor[dihedral_14_i];
atomicAdd(&atom_energy[atom_i], ene_lin + ene_lin2);
atomicAdd(&atom_virial[atom_i], -temp_frc * dr);
}
}
void Dihedral14LJCFForceWithAtomEnergyAndVirial(const int dihedral_14_numbers, const int atom_numbers,
const int *uint_crd_f, const int *LJtype, const float *charge,
float *uint_crd_with_LJ_f, const float *boxlength_f, const int *a_14,
const int *b_14, const float *lj_scale_factor,
const float *cf_scale_factor, const float *LJ_type_A,
const float *LJ_type_B, float *frc_f, float *atom_energy,
float *atom_virial, cudaStream_t stream) {
size_t thread_per_block = 128;
size_t block_per_grid = ceilf(static_cast<float>(atom_numbers) / 128);
UNSIGNED_INT_VECTOR *uint_crd =
const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f));
UINT_VECTOR_LJ_TYPE *uint_crd_with_LJ = reinterpret_cast<UINT_VECTOR_LJ_TYPE *>(uint_crd_with_LJ_f);
Copy_Crd_To_New_Crd_Start<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
atom_numbers, uint_crd, uint_crd_with_LJ, LJtype, charge);
Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128, 0, stream>>>(3 * atom_numbers, frc_f, 0.);
Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128, 0, stream>>>(atom_numbers, atom_energy, 0.);
Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128, 0, stream>>>(atom_numbers, atom_virial, 0.);
VECTOR *boxlength = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(boxlength_f));
VECTOR *frc = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(frc_f));
Dihedral14LJCFForceWithAtomEnergyAndVirialKernel<<<block_per_grid, thread_per_block, 0, stream>>>(
dihedral_14_numbers, uint_crd_with_LJ, boxlength, a_14, b_14, lj_scale_factor, cf_scale_factor, LJ_type_A,
LJ_type_B, frc, atom_energy, atom_virial);
return;
}
void Dihedral14LJCFForceWithAtomEnergyAndVirial(const int dihedral_14_numbers, const int atom_numbers,
const int *uint_crd_f, const int *LJtype, const float *charge,
float *uint_crd_with_LJ_f, const float *boxlength_f, const int *a_14,
const int *b_14, const float *lj_scale_factor,
const float *cf_scale_factor, const float *LJ_type_A,
const float *LJ_type_B, float *frc_f, float *atom_energy,
float *atom_virial, cudaStream_t stream);

View File

@ -1,28 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_LJ_CF_FORCE_WITH_ATOM_ENERGY_IMPL_H
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_LJ_CF_FORCE_WITH_ATOM_ENERGY_IMPL_H
#include <curand_kernel.h>
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
CUDA_LIB_EXPORT void Dihedral14LJCFForceWithAtomEnergyAndVirial(
const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f, const int *LJtype, const float *charge,
float *uint_crd_with_LJ_f, const float *boxlength_f, const int *a_14, const int *b_14, const float *lj_scale_factor,
const float *cf_scale_factor, const float *LJ_type_A, const float *LJ_type_B, float *frc_f, float *atom_energy,
float *atom_virial, cudaStream_t stream);
#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_LJ_CF_FORCE_WITH_ATOM_ENERGY_IMPL_H

View File

@ -1,140 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "plugin/device/gpu/kernel/cuda_impl/sponge/nb14/dihedral_14_lj_cf_force_with_atom_energy_impl.cuh"
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common_sponge.cuh"
__global__ void Dihedral14LJCFForceWithAtomEnergyKernel(const int dihedral_14_numbers,
const UINT_VECTOR_LJ_TYPE *uint_crd, const VECTOR *boxlength,
const int *a_14, const int *b_14, const float *lj_scale_factor,
const float *cf_scale_factor, const float *LJ_type_A,
const float *LJ_type_B, VECTOR *frc, float *atom_energy) {
int dihedral_14_i = blockDim.x * blockIdx.x + threadIdx.x;
if (dihedral_14_i < dihedral_14_numbers) {
int int_x;
int int_y;
int int_z;
UINT_VECTOR_LJ_TYPE r1, r2;
VECTOR dr;
float dr_abs;
float dr2;
float dr_1;
float dr_2;
float dr_4;
float dr_8;
float dr_14;
float frc_abs = 0.;
VECTOR temp_frc;
float ene_lin;
float ene_lin2;
int x, y;
int atom_pair_LJ_type;
int atom_i = a_14[dihedral_14_i];
int atom_j = b_14[dihedral_14_i];
r1 = uint_crd[atom_i];
r2 = uint_crd[atom_j];
int_x = r2.uint_x - r1.uint_x;
int_y = r2.uint_y - r1.uint_y;
int_z = r2.uint_z - r1.uint_z;
dr.x = boxlength[0].x * int_x;
dr.y = boxlength[0].y * int_y;
dr.z = boxlength[0].z * int_z;
dr2 = dr.x * dr.x + dr.y * dr.y + dr.z * dr.z;
dr_2 = 1.0 / dr2;
dr_4 = dr_2 * dr_2;
dr_8 = dr_4 * dr_4;
dr_14 = dr_8 * dr_4 * dr_2;
dr_abs = norm3df(dr.x, dr.y, dr.z);
dr_1 = 1. / dr_abs;
float charge_i = r1.charge;
float charge_j = r2.charge;
float frc_cf_abs;
frc_cf_abs = cf_scale_factor[dihedral_14_i] * dr_2 * dr_1;
frc_cf_abs = -charge_i * charge_j * frc_cf_abs;
y = (r2.LJ_type - r1.LJ_type);
x = y >> 31;
y = (y ^ x) - x;
x = r2.LJ_type + r1.LJ_type;
r2.LJ_type = (x + y) >> 1;
x = (x - y) >> 1;
atom_pair_LJ_type = (r2.LJ_type * (r2.LJ_type + 1) >> 1) + x;
frc_abs = -LJ_type_A[atom_pair_LJ_type] * dr_14 + LJ_type_B[atom_pair_LJ_type] * dr_8;
frc_abs *= lj_scale_factor[dihedral_14_i];
frc_abs += frc_cf_abs;
temp_frc.x = frc_abs * dr.x;
temp_frc.y = frc_abs * dr.y;
temp_frc.z = frc_abs * dr.z;
atomicAdd(&frc[atom_j].x, -temp_frc.x);
atomicAdd(&frc[atom_j].y, -temp_frc.y);
atomicAdd(&frc[atom_j].z, -temp_frc.z);
atomicAdd(&frc[atom_i].x, temp_frc.x);
atomicAdd(&frc[atom_i].y, temp_frc.y);
atomicAdd(&frc[atom_i].z, temp_frc.z);
ene_lin = r1.charge * r2.charge * dr_1;
ene_lin *= cf_scale_factor[dihedral_14_i];
ene_lin2 = 0.08333333 * LJ_type_A[atom_pair_LJ_type] * dr_4 * dr_8 -
0.1666666 * LJ_type_B[atom_pair_LJ_type] * dr_4 * dr_2; // LJ的A,B系数已经乘以12和6因此要反乘
ene_lin2 *= lj_scale_factor[dihedral_14_i];
atomicAdd(&atom_energy[atom_i], ene_lin + ene_lin2);
}
}
void Dihedral14LJCFForceWithAtomEnergy(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f,
const int *LJtype, const float *charge, float *uint_crd_with_LJ_f,
const float *boxlength_f, const int *a_14, const int *b_14,
const float *lj_scale_factor, const float *cf_scale_factor,
const float *LJ_type_A, const float *LJ_type_B, float *frc_f, float *atom_energy,
cudaStream_t stream) {
size_t thread_per_block = 128;
size_t block_per_grid = ceilf(static_cast<float>(dihedral_14_numbers) / 128);
UNSIGNED_INT_VECTOR *uint_crd =
const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f));
UINT_VECTOR_LJ_TYPE *uint_crd_with_LJ = reinterpret_cast<UINT_VECTOR_LJ_TYPE *>(uint_crd_with_LJ_f);
Copy_Crd_To_New_Crd_Start<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
atom_numbers, uint_crd, uint_crd_with_LJ, LJtype, charge);
Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128, 0, stream>>>(3 * atom_numbers, frc_f, 0.);
Reset_List<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>(atom_numbers, atom_energy, 0.);
VECTOR *boxlength = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(boxlength_f));
VECTOR *frc = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(frc_f));
Dihedral14LJCFForceWithAtomEnergyKernel<<<block_per_grid, thread_per_block, 0, stream>>>(
dihedral_14_numbers, uint_crd_with_LJ, boxlength, a_14, b_14, lj_scale_factor, cf_scale_factor, LJ_type_A,
LJ_type_B, frc, atom_energy);
return;
}
void Dihedral14LJCFForceWithAtomEnergy(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f,
const int *LJtype, const float *charge, float *uint_crd_with_LJ_f,
const float *boxlength_f, const int *a_14, const int *b_14,
const float *lj_scale_factor, const float *cf_scale_factor,
const float *LJ_type_A, const float *LJ_type_B, float *frc_f, float *atom_energy,
cudaStream_t stream);

View File

@ -1,30 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_LJ_CF_FORCE_WITH_ATOM_ENERGY_IMPL_H
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_LJ_CF_FORCE_WITH_ATOM_ENERGY_IMPL_H
#include <curand_kernel.h>
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
CUDA_LIB_EXPORT void Dihedral14LJCFForceWithAtomEnergy(const int dihedral_14_numbers, const int atom_numbers,
const int *uint_crd_f, const int *LJtype, const float *charge,
float *uint_crd_with_LJ_f, const float *boxlength_f,
const int *a_14, const int *b_14, const float *lj_scale_factor,
const float *cf_scale_factor, const float *LJ_type_A,
const float *LJ_type_B, float *frc_f, float *atom_energy,
cudaStream_t stream);
#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_LJ_CF_FORCE_WITH_ATOM_ENERGY_IMPL_H

View File

@ -1,98 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "plugin/device/gpu/kernel/cuda_impl/sponge/nb14/dihedral_14_lj_energy_impl.cuh"
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common_sponge.cuh"
__global__ void Dihedral14LJEnergyKernel(const int dihedral_14_numbers, const UINT_VECTOR_LJ_TYPE *uint_crd,
const VECTOR *boxlength, const int *a_14, const int *b_14,
const float *lj_scale_factor, const float *LJ_type_A, const float *LJ_type_B,
float *ene) {
int dihedral_14_i = blockDim.x * blockIdx.x + threadIdx.x;
if (dihedral_14_i < dihedral_14_numbers) {
int atom_i = a_14[dihedral_14_i];
int atom_j = b_14[dihedral_14_i];
UINT_VECTOR_LJ_TYPE r1 = uint_crd[atom_i];
UINT_VECTOR_LJ_TYPE r2 = uint_crd[atom_j];
int int_x;
int int_y;
int int_z;
VECTOR dr;
float dr2;
float dr_2;
float dr_4;
float dr_6;
float dr_12;
float ene_lin = 0.;
int x, y;
int atom_pair_LJ_type;
int_x = r2.uint_x - r1.uint_x;
int_y = r2.uint_y - r1.uint_y;
int_z = r2.uint_z - r1.uint_z;
dr.x = boxlength[0].x * int_x;
dr.y = boxlength[0].y * int_y;
dr.z = boxlength[0].z * int_z;
dr2 = dr.x * dr.x + dr.y * dr.y + dr.z * dr.z;
dr_2 = 1. / dr2;
dr_4 = dr_2 * dr_2;
dr_6 = dr_4 * dr_2;
dr_12 = dr_6 * dr_6;
y = (r2.LJ_type - r1.LJ_type);
x = y >> 31;
y = (y ^ x) - x;
x = r2.LJ_type + r1.LJ_type;
r2.LJ_type = (x + y) >> 1;
x = (x - y) >> 1;
atom_pair_LJ_type = (r2.LJ_type * (r2.LJ_type + 1) >> 1) + x;
ene_lin = 0.08333333 * LJ_type_A[atom_pair_LJ_type] * dr_12 -
0.1666666 * LJ_type_B[atom_pair_LJ_type] * dr_6; // LJ的A,B系数已经乘以12和6因此要反乘
ene_lin *= lj_scale_factor[dihedral_14_i];
ene[dihedral_14_i] = ene_lin;
}
}
void Dihedral14LJEnergy(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f, const int *LJtype,
const float *charge, float *uint_crd_with_LJ_f, const float *boxlength_f, const int *a_14,
const int *b_14, const float *lj_scale_factor, const float *LJ_type_A, const float *LJ_type_B,
float *ene, cudaStream_t stream) {
size_t thread_per_block = 32;
size_t block_per_grid = ceilf(static_cast<float>(dihedral_14_numbers) / 32);
UNSIGNED_INT_VECTOR *uint_crd =
const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f));
UINT_VECTOR_LJ_TYPE *uint_crd_with_LJ = reinterpret_cast<UINT_VECTOR_LJ_TYPE *>(uint_crd_with_LJ_f);
Copy_Crd_To_New_Crd_Start<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
atom_numbers, uint_crd, uint_crd_with_LJ, LJtype, charge);
VECTOR *boxlength = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(boxlength_f));
Dihedral14LJEnergyKernel<<<block_per_grid, thread_per_block, 0, stream>>>(
dihedral_14_numbers, uint_crd_with_LJ, boxlength, a_14, b_14, lj_scale_factor, LJ_type_A, LJ_type_B, ene);
return;
}
void Dihedral14LJEnergy(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f, const int *LJtype,
const float *charge, float *uint_crd_with_LJ_f, const float *boxlength_f, const int *a_14,
const int *b_14, const float *lj_scale_factor, const float *LJ_type_A, const float *LJ_type_B,
float *ene, cudaStream_t stream);

View File

@ -1,29 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_LJ_ENERGY_IMPL_H
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_LJ_ENERGY_IMPL_H
#include <curand_kernel.h>
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
CUDA_LIB_EXPORT void Dihedral14LJEnergy(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f,
const int *LJtype, const float *charge, float *uint_crd_with_LJ_f,
const float *boxlength_f, const int *a_14, const int *b_14,
const float *lj_scale_factor, const float *LJ_type_A, const float *LJ_type_B,
float *ene, cudaStream_t stream);
#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_LJ_ENERGY_IMPL_H

View File

@ -1,111 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "plugin/device/gpu/kernel/cuda_impl/sponge/nb14/dihedral_14_lj_force_impl.cuh"
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common_sponge.cuh"
__global__ void Dihedral14LJForceKernel(const int dihedral_14_numbers, const UINT_VECTOR_LJ_TYPE *uint_crd,
const VECTOR *boxlength, const int *a_14, const int *b_14,
const float *lj_scale_factor, const float *LJ_type_A, const float *LJ_type_B,
VECTOR *frc) {
int dihedral_14_i = blockDim.x * blockIdx.x + threadIdx.x;
if (dihedral_14_i < dihedral_14_numbers) {
int int_x;
int int_y;
int int_z;
UINT_VECTOR_LJ_TYPE r1, r2;
VECTOR dr;
float dr2;
float dr_2;
float dr_4;
float dr_8;
float dr_14;
float frc_abs = 0.;
VECTOR temp_frc;
int x, y;
int atom_pair_LJ_type;
int atom_i = a_14[dihedral_14_i];
int atom_j = b_14[dihedral_14_i];
r1 = uint_crd[atom_i];
r2 = uint_crd[atom_j];
int_x = r2.uint_x - r1.uint_x;
int_y = r2.uint_y - r1.uint_y;
int_z = r2.uint_z - r1.uint_z;
dr.x = boxlength[0].x * int_x;
dr.y = boxlength[0].y * int_y;
dr.z = boxlength[0].z * int_z;
dr2 = dr.x * dr.x + dr.y * dr.y + dr.z * dr.z;
dr_2 = 1.0 / dr2;
dr_4 = dr_2 * dr_2;
dr_8 = dr_4 * dr_4;
dr_14 = dr_8 * dr_4 * dr_2;
y = (r2.LJ_type - r1.LJ_type);
x = y >> 31;
y = (y ^ x) - x;
x = r2.LJ_type + r1.LJ_type;
r2.LJ_type = (x + y) >> 1;
x = (x - y) >> 1;
atom_pair_LJ_type = (r2.LJ_type * (r2.LJ_type + 1) >> 1) + x;
frc_abs = -LJ_type_A[atom_pair_LJ_type] * dr_14 + LJ_type_B[atom_pair_LJ_type] * dr_8;
frc_abs *= lj_scale_factor[dihedral_14_i];
temp_frc.x = frc_abs * dr.x;
temp_frc.y = frc_abs * dr.y;
temp_frc.z = frc_abs * dr.z;
atomicAdd(&frc[atom_j].x, -temp_frc.x);
atomicAdd(&frc[atom_j].y, -temp_frc.y);
atomicAdd(&frc[atom_j].z, -temp_frc.z);
atomicAdd(&frc[atom_i].x, temp_frc.x);
atomicAdd(&frc[atom_i].y, temp_frc.y);
atomicAdd(&frc[atom_i].z, temp_frc.z);
}
}
void Dihedral14LJForce(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f, const int *LJtype,
const float *charge, const float *boxlength_f, const int *a_14, const int *b_14,
const float *lj_scale_factor, const float *LJ_type_A, const float *LJ_type_B, float *frc_f,
cudaStream_t stream) {
size_t thread_per_block = 128;
size_t block_per_grid = ceilf(static_cast<float>(atom_numbers) / 128);
UINT_VECTOR_LJ_TYPE *uint_crd_with_LJ = NULL;
Cuda_Malloc_Safely(reinterpret_cast<void **>(&uint_crd_with_LJ), sizeof(UINT_VECTOR_LJ_TYPE) * atom_numbers);
UNSIGNED_INT_VECTOR *uint_crd =
const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f));
Copy_Crd_To_New_Crd_Start<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
atom_numbers, uint_crd, uint_crd_with_LJ, LJtype, charge);
cudaStreamSynchronize(stream);
Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128, 0, stream>>>(3 * atom_numbers, frc_f, 0.);
VECTOR *boxlength = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(boxlength_f));
VECTOR *frc = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(frc_f));
Dihedral14LJForceKernel<<<block_per_grid, thread_per_block, 0, stream>>>(
dihedral_14_numbers, uint_crd_with_LJ, boxlength, a_14, b_14, lj_scale_factor, LJ_type_A, LJ_type_B, frc);
cudaStreamSynchronize(stream);
return;
}
void Dihedral14LJForce(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f, const int *LJtype,
const float *charge, const float *boxlength_f, const int *a_14, const int *b_14,
const float *lj_scale_factor, const float *LJ_type_A, const float *LJ_type_B, float *frc_f,
cudaStream_t stream);

View File

@ -1,28 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_LJ_FORCE_IMPL_H
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_LJ_FORCE_IMPL_H
#include <curand_kernel.h>
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
CUDA_LIB_EXPORT void Dihedral14LJForce(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f,
const int *LJtype, const float *charge, const float *boxlength_f,
const int *a_14, const int *b_14, const float *lj_scale_factor,
const float *LJ_type_A, const float *LJ_type_B, float *frc_f,
cudaStream_t stream);
#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_LJ_FORCE_IMPL_H

View File

@ -1,124 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "plugin/device/gpu/kernel/cuda_impl/sponge/nb14/dihedral_14_lj_force_with_direct_cf_impl.cuh"
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common_sponge.cuh"
__global__ void Dihedral14LJForceWithDirectCFKernel(const int dihedral_14_numbers, const UINT_VECTOR_LJ_TYPE *uint_crd,
const VECTOR *boxlength, const int *a_14, const int *b_14,
const float *lj_scale_factor, const float *cf_scale_factor,
const float *LJ_type_A, const float *LJ_type_B, VECTOR *frc) {
int dihedral_14_i = blockDim.x * blockIdx.x + threadIdx.x;
if (dihedral_14_i < dihedral_14_numbers) {
int int_x;
int int_y;
int int_z;
UINT_VECTOR_LJ_TYPE r1, r2;
VECTOR dr;
float dr_abs;
float dr2;
float dr_1;
float dr_2;
float dr_4;
float dr_8;
float dr_14;
float frc_abs = 0.;
VECTOR temp_frc;
int x, y;
int atom_pair_LJ_type;
int atom_i = a_14[dihedral_14_i];
int atom_j = b_14[dihedral_14_i];
r1 = uint_crd[atom_i];
r2 = uint_crd[atom_j];
int_x = r2.uint_x - r1.uint_x;
int_y = r2.uint_y - r1.uint_y;
int_z = r2.uint_z - r1.uint_z;
dr.x = boxlength[0].x * int_x;
dr.y = boxlength[0].y * int_y;
dr.z = boxlength[0].z * int_z;
dr2 = dr.x * dr.x + dr.y * dr.y + dr.z * dr.z;
dr_2 = 1.0 / dr2;
dr_4 = dr_2 * dr_2;
dr_8 = dr_4 * dr_4;
dr_14 = dr_8 * dr_4 * dr_2;
dr_abs = norm3df(dr.x, dr.y, dr.z);
dr_1 = 1. / dr_abs;
float charge_i = r1.charge;
float charge_j = r2.charge;
float frc_cf_abs;
frc_cf_abs = cf_scale_factor[dihedral_14_i] * dr_2 * dr_1;
frc_cf_abs = -charge_i * charge_j * frc_cf_abs;
// LJ
y = (r2.LJ_type - r1.LJ_type);
x = y >> 31;
y = (y ^ x) - x;
x = r2.LJ_type + r1.LJ_type;
r2.LJ_type = (x + y) >> 1;
x = (x - y) >> 1;
atom_pair_LJ_type = (r2.LJ_type * (r2.LJ_type + 1) >> 1) + x;
frc_abs = -LJ_type_A[atom_pair_LJ_type] * dr_14 + LJ_type_B[atom_pair_LJ_type] * dr_8;
frc_abs *= lj_scale_factor[dihedral_14_i];
frc_abs += frc_cf_abs;
temp_frc.x = frc_abs * dr.x;
temp_frc.y = frc_abs * dr.y;
temp_frc.z = frc_abs * dr.z;
atomicAdd(&frc[atom_j].x, -temp_frc.x);
atomicAdd(&frc[atom_j].y, -temp_frc.y);
atomicAdd(&frc[atom_j].z, -temp_frc.z);
atomicAdd(&frc[atom_i].x, temp_frc.x);
atomicAdd(&frc[atom_i].y, temp_frc.y);
atomicAdd(&frc[atom_i].z, temp_frc.z);
}
}
void Dihedral14LJForceWithDirectCF(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f,
const int *LJtype, const float *charge, const float *boxlength_f, const int *a_14,
const int *b_14, const float *lj_scale_factor, const float *cf_scale_factor,
const float *LJ_type_A, const float *LJ_type_B, float *frc_f, cudaStream_t stream) {
size_t thread_per_block = 128;
size_t block_per_grid = ceilf(static_cast<float>(atom_numbers) / 128);
UINT_VECTOR_LJ_TYPE *uint_crd_with_LJ = NULL;
Cuda_Malloc_Safely(reinterpret_cast<void **>(&uint_crd_with_LJ), sizeof(UINT_VECTOR_LJ_TYPE) * atom_numbers);
UNSIGNED_INT_VECTOR *uint_crd =
const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f));
Copy_Crd_To_New_Crd_Start<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
atom_numbers, uint_crd, uint_crd_with_LJ, LJtype, charge);
cudaStreamSynchronize(stream);
VECTOR *boxlength = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(boxlength_f));
Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128, 0, stream>>>(3 * atom_numbers, frc_f, 0.);
VECTOR *frc = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(frc_f));
Dihedral14LJForceWithDirectCFKernel<<<block_per_grid, thread_per_block, 0, stream>>>(
dihedral_14_numbers, uint_crd_with_LJ, boxlength, a_14, b_14, lj_scale_factor, cf_scale_factor, LJ_type_A,
LJ_type_B, frc);
return;
}
void Dihedral14LJForceWithDirectCF(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f,
const int *LJtype, const float *charge, const float *boxlength_f, const int *a_14,
const int *b_14, const float *lj_scale_factor, const float *cf_scale_factor,
const float *LJ_type_A, const float *LJ_type_B, float *frc_f, cudaStream_t stream);

View File

@ -1,29 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_LJ_FORCE_WITH_DIRECT_CF_IMPL_H
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_LJ_FORCE_WITH_DIRECT_CF_IMPL_H
#include <curand_kernel.h>
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
CUDA_LIB_EXPORT void Dihedral14LJForceWithDirectCF(const int dihedral_14_numbers, const int atom_numbers,
const int *uint_crd_f, const int *LJtype, const float *charge,
const float *boxlength_f, const int *a_14, const int *b_14,
const float *lj_scale_factor, const float *cf_scale_factor,
const float *LJ_type_A, const float *LJ_type_B, float *frc_f,
cudaStream_t stream);
#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_LJ_FORCE_WITH_DIRECT_CF_IMPL_H

View File

@ -1,644 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Note:
* NeighborListUpdate. This is an experimental interface that is subject to change and/or deletion.
*/
#include "plugin/device/gpu/kernel/cuda_impl/sponge/neighbor_list/neighbor_list_impl.cuh"
#include <vector>
// common functions
static __global__ void Copy_List(const int element_numbers, const float *origin_list, float *list) {
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < element_numbers) {
list[i] = origin_list[i];
}
}
static __global__ void Crd_To_Uint_Crd(const int atom_numbers, float *scale_factor, const VECTOR *crd,
UNSIGNED_INT_VECTOR *uint_crd) {
int atom_i = blockDim.x * blockIdx.x + threadIdx.x;
if (atom_i < atom_numbers) {
INT_VECTOR tempi;
VECTOR temp = crd[atom_i];
temp.x *= scale_factor[0];
temp.y *= scale_factor[1];
temp.z *= scale_factor[2];
tempi.int_x = temp.x;
tempi.int_y = temp.y;
tempi.int_z = temp.z;
uint_crd[atom_i].uint_x = (tempi.int_x << 2);
uint_crd[atom_i].uint_y = (tempi.int_y << 2);
uint_crd[atom_i].uint_z = (tempi.int_z << 2);
}
}
static __global__ void Crd_Periodic_Map(const int atom_numbers, VECTOR *crd, const float *box_length) {
int atom_i = blockDim.x * blockIdx.x + threadIdx.x;
if (atom_i < atom_numbers) {
if (crd[atom_i].x >= 0) {
if (crd[atom_i].x < box_length[0]) {
} else {
crd[atom_i].x = crd[atom_i].x - box_length[0];
}
} else {
crd[atom_i].x = crd[atom_i].x + box_length[0];
}
if (crd[atom_i].y >= 0) {
if (crd[atom_i].y < box_length[1]) {
} else {
crd[atom_i].y = crd[atom_i].y - box_length[1];
}
} else {
crd[atom_i].y = crd[atom_i].y + box_length[1];
}
if (crd[atom_i].z >= 0) {
if (crd[atom_i].z < box_length[2]) {
} else {
crd[atom_i].z = crd[atom_i].z - box_length[2];
}
} else {
crd[atom_i].z = crd[atom_i].z + box_length[2];
}
}
}
static __global__ void Clear_Grid_Bucket(const int grid_numbers, int *atom_numbers_in_grid_bucket,
GRID_BUCKET *bucket) {
int grid_serial = blockDim.x * blockIdx.x + threadIdx.x;
if (grid_serial < grid_numbers) {
GRID_BUCKET bucket_i = bucket[grid_serial];
for (int i = 0; i < atom_numbers_in_grid_bucket[grid_serial]; i = i + 1) {
bucket_i.atom_serial[i] = -1;
}
atom_numbers_in_grid_bucket[grid_serial] = 0;
}
}
static __global__ void Find_Atom_In_Grid_Serial(const int atom_numbers, const float *grid_length_inverse,
const VECTOR *crd, const int *grid_N, const int gridxy,
int *atom_in_grid_serial) {
int atom_i = blockDim.x * blockIdx.x + threadIdx.x;
if (atom_i < atom_numbers) {
int Nx = static_cast<float>(crd[atom_i].x) * grid_length_inverse[0];
int Ny = static_cast<float>(crd[atom_i].y) * grid_length_inverse[1];
int Nz = static_cast<float>(crd[atom_i].z) * grid_length_inverse[2];
Nx = Nx & ((Nx - grid_N[0]) >> 31);
Ny = Ny & ((Ny - grid_N[1]) >> 31);
Nz = Nz & ((Nz - grid_N[2]) >> 31);
atom_in_grid_serial[atom_i] = Nz * gridxy + Ny * grid_N[0] + Nx;
}
}
static __global__ void Put_Atom_In_Grid_Bucket(const int atom_numbers, const int *atom_in_grid_serial,
GRID_BUCKET *bucket, int *atom_numbers_in_grid_bucket) {
int atom_i = blockDim.x * blockIdx.x + threadIdx.x;
if (atom_i < atom_numbers) {
int grid_serial = atom_in_grid_serial[atom_i];
GRID_BUCKET bucket_i = bucket[grid_serial];
int a = atom_numbers_in_grid_bucket[grid_serial];
atomicCAS(&bucket_i.atom_serial[a], -1, atom_i);
if (bucket_i.atom_serial[a] != atom_i) {
while (true) {
a = a + 1;
atomicCAS(&bucket_i.atom_serial[a], -1, atom_i);
if (bucket_i.atom_serial[a] == atom_i) {
atomicAdd(&atom_numbers_in_grid_bucket[grid_serial], 1);
break;
}
}
} else {
atomicAdd(&atom_numbers_in_grid_bucket[grid_serial], 1);
}
}
}
static __global__ void Find_atom_neighbors(const int atom_numbers, const UNSIGNED_INT_VECTOR *uint_crd,
const float *uint_dr_to_dr_cof, const int *atom_in_grid_serial,
const GRID_POINTER *gpointer, const GRID_BUCKET *bucket,
const int *atom_numbers_in_grid_bucket, NEIGHBOR_LIST *nl,
const float cutoff_skin_square) {
int atom_i = blockDim.x * blockIdx.x + threadIdx.x;
if (atom_i < atom_numbers) {
int grid_serial = atom_in_grid_serial[atom_i];
int grid_serial2;
int atom_numbers_in_nl_lin = 0;
int atom_j;
int int_x;
int int_y;
int int_z;
UNSIGNED_INT_VECTOR uint_crd_i = uint_crd[atom_i];
NEIGHBOR_LIST nl_i = nl[atom_i];
GRID_POINTER gpointer_i = gpointer[grid_serial];
VECTOR dr;
float dr2;
for (int grid_cycle = 0; grid_cycle < 125; grid_cycle = grid_cycle + 1) {
grid_serial2 = gpointer_i.grid_serial[grid_cycle];
GRID_BUCKET bucket_i = bucket[grid_serial2];
for (int i = 0; i < atom_numbers_in_grid_bucket[grid_serial2]; i = i + 1) {
atom_j = bucket_i.atom_serial[i];
if (atom_j > atom_i) {
int_x = uint_crd[atom_j].uint_x - uint_crd_i.uint_x;
int_y = uint_crd[atom_j].uint_y - uint_crd_i.uint_y;
int_z = uint_crd[atom_j].uint_z - uint_crd_i.uint_z;
dr.x = uint_dr_to_dr_cof[0] * int_x;
dr.y = uint_dr_to_dr_cof[1] * int_y;
dr.z = uint_dr_to_dr_cof[2] * int_z;
dr2 = dr.x * dr.x + dr.y * dr.y + dr.z * dr.z;
if (dr2 < cutoff_skin_square) {
nl_i.atom_serial[atom_numbers_in_nl_lin] = atom_j;
atom_numbers_in_nl_lin = atom_numbers_in_nl_lin + 1;
}
}
}
}
nl[atom_i].atom_numbers = atom_numbers_in_nl_lin;
}
}
static __global__ void Delete_Excluded_Atoms_Serial_In_Neighbor_List(const int atom_numbers, NEIGHBOR_LIST *nl,
const int *excluded_list_start,
const int *excluded_list,
const int *excluded_atom_numbers) {
int atom_i = blockDim.x * blockIdx.x + threadIdx.x;
if (atom_i < atom_numbers) {
int excluded_number = excluded_atom_numbers[atom_i];
if (excluded_number > 0) {
int list_start = excluded_list_start[atom_i];
int atom_min = excluded_list[list_start];
int list_end = list_start + excluded_number;
int atom_max = excluded_list[list_end - 1];
NEIGHBOR_LIST nl_i = nl[atom_i];
int atomnumbers_in_nl_lin = nl_i.atom_numbers;
int atom_j;
int excluded_atom_numbers_lin = list_end - list_start;
int excluded_atom_numbers_count = 0;
for (int i = 0; i < atomnumbers_in_nl_lin; i = i + 1) {
atom_j = nl_i.atom_serial[i];
if (atom_j < atom_min || atom_j > atom_max) {
continue;
} else {
for (int j = list_start; j < list_end; j = j + 1) {
if (atom_j == excluded_list[j]) {
atomnumbers_in_nl_lin = atomnumbers_in_nl_lin - 1;
nl_i.atom_serial[i] = nl_i.atom_serial[atomnumbers_in_nl_lin];
excluded_atom_numbers_count = excluded_atom_numbers_count + 1;
i = i - 1;
}
}
if (excluded_atom_numbers_count < excluded_atom_numbers_lin) {
} else {
break;
}
}
}
nl[atom_i].atom_numbers = atomnumbers_in_nl_lin;
}
}
}
static __global__ void construct_neighbor_list_kernel(int atom_numbers, int max_neighbor_numbers, int *nl_atom_numbers,
int *nl_atom_serial, NEIGHBOR_LIST *nl) {
for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < atom_numbers; i += gridDim.x * blockDim.x) {
nl[i].atom_numbers = nl_atom_numbers[i];
nl[i].atom_serial = nl_atom_serial + i * max_neighbor_numbers;
}
}
static __global__ void copy_neighbor_list_atom_number(int atom_numbers, int max_neighbor_numbers, NEIGHBOR_LIST *nl,
int *nl_atom_numbers, int *nl_atom_serial) {
int i, j;
for (i = blockIdx.x * blockDim.x + threadIdx.x; i < atom_numbers; i += gridDim.x * blockDim.x) {
nl_atom_numbers[i] = nl[i].atom_numbers;
for (j = blockIdx.y * blockDim.y + threadIdx.y; j < max_neighbor_numbers; j += gridDim.y * blockDim.y) {
if (j < nl_atom_numbers[i]) {
nl_atom_serial[i * max_neighbor_numbers + j] = nl[i].atom_serial[j];
} else {
nl_atom_serial[i * max_neighbor_numbers + j] = 0;
}
}
}
}
static __global__ void Mul_half(float *src, float *dst) {
int index = threadIdx.x;
if (index < 3) {
dst[index] = src[index] * 0.5;
}
}
static __global__ void Mul_quarter(float *src, float *dst) {
int index = threadIdx.x;
if (index < 3) {
dst[index] = src[index] * 0.25;
}
}
// old neighbor list update functions
__global__ void Crd_To_Uint_Crd_Half(const int atom_numbers, float *scale_factor, const VECTOR *crd,
UNSIGNED_INT_VECTOR *uint_crd) {
int atom_i = blockDim.x * blockIdx.x + threadIdx.x;
if (atom_i < atom_numbers) {
uint_crd[atom_i].uint_x = crd[atom_i].x * scale_factor[0];
uint_crd[atom_i].uint_y = crd[atom_i].y * scale_factor[1];
uint_crd[atom_i].uint_z = crd[atom_i].z * scale_factor[2];
uint_crd[atom_i].uint_x = uint_crd[atom_i].uint_x << 1;
uint_crd[atom_i].uint_y = uint_crd[atom_i].uint_y << 1;
uint_crd[atom_i].uint_z = uint_crd[atom_i].uint_z << 1;
}
}
__global__ void Vector_Translation(const int vector_numbers, VECTOR *vec_list, const VECTOR translation_vec) {
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < vector_numbers) {
vec_list[i].x = vec_list[i].x + translation_vec.x;
vec_list[i].y = vec_list[i].y + translation_vec.y;
vec_list[i].z = vec_list[i].z + translation_vec.z;
}
}
__global__ void Vector_Translation(const int vector_numbers, VECTOR *vec_list, const VECTOR *translation_vec) {
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < vector_numbers) {
vec_list[i].x = vec_list[i].x + translation_vec[0].x;
vec_list[i].y = vec_list[i].y + translation_vec[0].y;
vec_list[i].z = vec_list[i].z + translation_vec[0].z;
}
}
__global__ void Is_need_refresh_neighbor_list_cuda(const int atom_numbers, const VECTOR *crd, const VECTOR *old_crd,
const float half_skin_square, int *need_refresh_flag) {
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < atom_numbers) {
VECTOR r1 = crd[i];
VECTOR r2 = old_crd[i];
r1.x = r1.x - r2.x;
r1.y = r1.y - r2.y;
r1.z = r1.z - r2.z;
float r1_2 = r1.x * r1.x + r1.y * r1.y + r1.z * r1.z;
if (r1_2 > half_skin_square) {
atomicExch(&need_refresh_flag[0], 1);
}
}
}
void Refresh_Neighbor_List_Half(int *refresh_sign, const int thread, const int atom_numbers, VECTOR *crd,
VECTOR *old_crd, UNSIGNED_INT_VECTOR *uint_crd, float *crd_to_uint_crd_cof,
float *uint_dr_to_dr_cof, int *atom_in_grid_serial, const float skin, float *box_length,
const GRID_POINTER *gpointer, GRID_BUCKET *bucket, int *atom_numbers_in_grid_bucket,
NEIGHBOR_LIST *d_nl, int *excluded_list_start, int *excluded_list,
int *excluded_numbers, float cutoff_skin_square, int grid_numbers,
float *grid_length_inverse, int *grid_N, int nxy, cudaStream_t stream) {
std::vector<int> h_refresh_sign(1);
cudaMemcpyAsync(h_refresh_sign.data(), refresh_sign, sizeof(int), cudaMemcpyDeviceToHost, stream);
if (h_refresh_sign[0] == 1) {
VECTOR trans_vec = {-skin, -skin, -skin};
Clear_Grid_Bucket<<<ceilf(static_cast<float>(grid_numbers) / thread), thread, 0, stream>>>(
grid_numbers, atom_numbers_in_grid_bucket, bucket);
Vector_Translation<<<ceilf(static_cast<float>(atom_numbers) / thread), thread, 0, stream>>>(atom_numbers, crd,
trans_vec);
Crd_Periodic_Map<<<ceilf(static_cast<float>(atom_numbers) / thread), thread, 0, stream>>>(atom_numbers, crd,
box_length);
Find_Atom_In_Grid_Serial<<<ceilf(static_cast<float>(atom_numbers) / thread), thread, 0, stream>>>(
atom_numbers, grid_length_inverse, crd, grid_N, nxy, atom_in_grid_serial);
trans_vec.x = -trans_vec.x;
trans_vec.y = -trans_vec.y;
trans_vec.z = -trans_vec.z;
Vector_Translation<<<ceilf(static_cast<float>(atom_numbers) / thread), thread, 0, stream>>>(atom_numbers, crd,
trans_vec);
Copy_List<<<ceilf(static_cast<float>(3. * atom_numbers) / thread), thread, 0, stream>>>(
3 * atom_numbers, reinterpret_cast<float *>(crd), reinterpret_cast<float *>(old_crd));
Put_Atom_In_Grid_Bucket<<<ceilf(static_cast<float>(atom_numbers) / thread), thread, 0, stream>>>(
atom_numbers, atom_in_grid_serial, bucket, atom_numbers_in_grid_bucket);
Crd_To_Uint_Crd_Half<<<ceilf(static_cast<float>(atom_numbers) / thread), thread, 0, stream>>>(
atom_numbers, crd_to_uint_crd_cof, crd, uint_crd);
Find_atom_neighbors<<<ceilf(static_cast<float>(atom_numbers) / thread), thread, 0, stream>>>(
atom_numbers, uint_crd, uint_dr_to_dr_cof, atom_in_grid_serial, gpointer, bucket, atom_numbers_in_grid_bucket,
d_nl, cutoff_skin_square);
Delete_Excluded_Atoms_Serial_In_Neighbor_List<<<ceilf(static_cast<float>(atom_numbers) / thread), thread, 0,
stream>>>(atom_numbers, d_nl, excluded_list_start, excluded_list,
excluded_numbers);
h_refresh_sign[0] = 0;
}
}
void Refresh_Neighbor_List_First_Time(int *refresh_sign, const int thread, const int atom_numbers, VECTOR *crd,
VECTOR *old_crd, UNSIGNED_INT_VECTOR *uint_crd, float *crd_to_uint_crd_cof,
float *uint_dr_to_dr_cof, int *atom_in_grid_serial, const float skin,
float *box_length, const GRID_POINTER *gpointer, GRID_BUCKET *bucket,
int *atom_numbers_in_grid_bucket, NEIGHBOR_LIST *d_nl, int *excluded_list_start,
int *excluded_list, int *excluded_numbers, float cutoff_skin_square,
int grid_numbers, float *grid_length_inverse, int *grid_N, int nxy,
cudaStream_t stream) {
VECTOR trans_vec = {skin, skin, skin};
Clear_Grid_Bucket<<<ceilf(static_cast<float>(grid_numbers) / 32), 32, 0, stream>>>(
grid_numbers, atom_numbers_in_grid_bucket, bucket);
Crd_Periodic_Map<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(atom_numbers, crd, box_length);
Find_Atom_In_Grid_Serial<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
atom_numbers, grid_length_inverse, crd, grid_N, nxy, atom_in_grid_serial);
Vector_Translation<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(atom_numbers, crd, trans_vec);
Copy_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 32), 32, 0, stream>>>(
3 * atom_numbers, reinterpret_cast<float *>(crd), reinterpret_cast<float *>(old_crd));
Put_Atom_In_Grid_Bucket<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
atom_numbers, atom_in_grid_serial, bucket, atom_numbers_in_grid_bucket);
Crd_To_Uint_Crd_Half<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
atom_numbers, crd_to_uint_crd_cof, crd, uint_crd);
Find_atom_neighbors<<<ceilf(static_cast<float>(atom_numbers) / thread), thread, 0, stream>>>(
atom_numbers, uint_crd, uint_dr_to_dr_cof, atom_in_grid_serial, gpointer, bucket, atom_numbers_in_grid_bucket, d_nl,
cutoff_skin_square);
Delete_Excluded_Atoms_Serial_In_Neighbor_List<<<ceilf(static_cast<float>(atom_numbers) / thread), thread, 0,
stream>>>(atom_numbers, d_nl, excluded_list_start, excluded_list,
excluded_numbers);
}
__global__ void copy_neighbor_list_atom_number(int atom_numbers, NEIGHBOR_LIST *nl, int *nl_atom_numbers) {
for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < atom_numbers; i += gridDim.x * blockDim.x) {
nl_atom_numbers[i] = nl[i].atom_numbers;
}
}
void ConstructNeighborListHalf(int atom_numbers, int max_neighbor_numbers, int *nl_atom_numbers, int *nl_atom_serial,
NEIGHBOR_LIST *nl, cudaStream_t stream) {
construct_neighbor_list_kernel<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>(
atom_numbers, max_neighbor_numbers, nl_atom_numbers, nl_atom_serial, nl);
}
void CopyNeighborListHalf(int atom_numbers, NEIGHBOR_LIST *nl, int *nl_atom_numbers, cudaStream_t stream) {
copy_neighbor_list_atom_number<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>(atom_numbers, nl,
nl_atom_numbers);
}
void Refresh_Neighbor_List_No_Check_Half(int grid_numbers, int atom_numbers, float skin, int nxy,
float cutoff_skin_square, int *grid_N, float *box_length,
int *atom_numbers_in_grid_bucket, float *grid_length_inverse,
int *atom_in_grid_serial, GRID_BUCKET *bucket, VECTOR *crd, VECTOR *old_crd,
float *crd_to_uint_crd_cof, UNSIGNED_INT_VECTOR *uint_crd,
float *uint_dr_to_dr_cof, GRID_POINTER *gpointer, NEIGHBOR_LIST *d_nl,
int *excluded_list_start, int *excluded_list, int *excluded_numbers,
cudaStream_t stream) {
VECTOR trans_vec = {-skin, -skin, -skin};
Clear_Grid_Bucket<<<ceilf(static_cast<float>(grid_numbers) / 32), 32, 0, stream>>>(
grid_numbers, atom_numbers_in_grid_bucket, bucket);
Vector_Translation<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(atom_numbers, crd, trans_vec);
Crd_Periodic_Map<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(atom_numbers, crd, box_length);
Find_Atom_In_Grid_Serial<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
atom_numbers, grid_length_inverse, crd, grid_N, nxy, atom_in_grid_serial);
trans_vec.x = -trans_vec.x;
trans_vec.y = -trans_vec.y;
trans_vec.z = -trans_vec.z;
Vector_Translation<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(atom_numbers, crd, trans_vec);
cudaMemcpyAsync(old_crd, crd, sizeof(VECTOR) * atom_numbers, cudaMemcpyDeviceToDevice, stream);
Put_Atom_In_Grid_Bucket<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
atom_numbers, atom_in_grid_serial, bucket, atom_numbers_in_grid_bucket);
Crd_To_Uint_Crd_Half<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
atom_numbers, crd_to_uint_crd_cof, crd, uint_crd);
Find_atom_neighbors<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
atom_numbers, uint_crd, uint_dr_to_dr_cof, atom_in_grid_serial, gpointer, bucket, atom_numbers_in_grid_bucket, d_nl,
cutoff_skin_square);
Delete_Excluded_Atoms_Serial_In_Neighbor_List<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
atom_numbers, d_nl, excluded_list_start, excluded_list, excluded_numbers);
}
void NeighborListUpdate(int grid_numbers, int atom_numbers, int *d_refresh_count, int refresh_interval,
int not_first_time, float skin, int nxy, float cutoff_square, float cutoff_with_skin_square,
int *grid_N, float *box_length, int *atom_numbers_in_grid_bucket, float *grid_length_inverse,
int *atom_in_grid_serial, GRID_BUCKET *bucket, float *crd, float *old_crd,
float *crd_to_uint_crd_cof, float *half_crd_to_uint_crd_cof, unsigned int *uint_crd,
float *uint_dr_to_dr_cof, GRID_POINTER *gpointer, NEIGHBOR_LIST *d_nl, int *excluded_list_start,
int *excluded_list, int *excluded_numbers, float half_skin_square,
int *is_need_refresh_neighbor_list, cudaStream_t stream) {
if (not_first_time) {
if (refresh_interval > 0) {
std::vector<int> refresh_count_list(1);
cudaMemcpyAsync(refresh_count_list.data(), d_refresh_count, sizeof(int), cudaMemcpyDeviceToHost, stream);
cudaStreamSynchronize(stream);
int refresh_count = refresh_count_list[0];
if (refresh_count % refresh_interval == 0) {
Mul_half<<<1, 3, 0, stream>>>(crd_to_uint_crd_cof, half_crd_to_uint_crd_cof);
Refresh_Neighbor_List_No_Check_Half(
grid_numbers, atom_numbers, skin, nxy, cutoff_square, grid_N, box_length, atom_numbers_in_grid_bucket,
grid_length_inverse, atom_in_grid_serial, bucket, reinterpret_cast<VECTOR *>(crd),
reinterpret_cast<VECTOR *>(old_crd), half_crd_to_uint_crd_cof,
reinterpret_cast<UNSIGNED_INT_VECTOR *>(uint_crd), uint_dr_to_dr_cof, gpointer, d_nl, excluded_list_start,
excluded_list, excluded_numbers, stream);
}
refresh_count += 1;
cudaMemcpyAsync(d_refresh_count, &refresh_count, sizeof(int), cudaMemcpyHostToDevice, stream);
} else {
Is_need_refresh_neighbor_list_cuda<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>(
atom_numbers, reinterpret_cast<VECTOR *>(crd), reinterpret_cast<VECTOR *>(old_crd), half_skin_square,
is_need_refresh_neighbor_list);
Mul_half<<<1, 3, 0, stream>>>(crd_to_uint_crd_cof, half_crd_to_uint_crd_cof);
Refresh_Neighbor_List_Half(is_need_refresh_neighbor_list, 32, atom_numbers, reinterpret_cast<VECTOR *>(crd),
reinterpret_cast<VECTOR *>(old_crd), reinterpret_cast<UNSIGNED_INT_VECTOR *>(uint_crd),
half_crd_to_uint_crd_cof, uint_dr_to_dr_cof, atom_in_grid_serial, skin, box_length,
gpointer, bucket, atom_numbers_in_grid_bucket, d_nl, excluded_list_start,
excluded_list, excluded_numbers, cutoff_with_skin_square, grid_numbers,
grid_length_inverse, grid_N, nxy, stream);
}
} else {
Mul_half<<<1, 3, 0, stream>>>(crd_to_uint_crd_cof, half_crd_to_uint_crd_cof);
Refresh_Neighbor_List_First_Time(
is_need_refresh_neighbor_list, 32, atom_numbers, reinterpret_cast<VECTOR *>(crd),
reinterpret_cast<VECTOR *>(old_crd), reinterpret_cast<UNSIGNED_INT_VECTOR *>(uint_crd), half_crd_to_uint_crd_cof,
uint_dr_to_dr_cof, atom_in_grid_serial, skin, box_length, gpointer, bucket, atom_numbers_in_grid_bucket, d_nl,
excluded_list_start, excluded_list, excluded_numbers, cutoff_with_skin_square, grid_numbers, grid_length_inverse,
grid_N, nxy, stream);
}
}
// new neighbor list update functions
__device__ __host__ VECTOR Get_Periodic_Displacement_Update(const VECTOR vec_a, const VECTOR vec_b,
const VECTOR box_length) {
VECTOR dr;
dr.x = vec_a.x - vec_b.x;
dr.y = vec_a.y - vec_b.y;
dr.x = vec_a.z - vec_b.z;
dr.x = dr.x - floorf(dr.x / box_length.x + 0.5) * box_length.x;
dr.y = dr.y - floorf(dr.y / box_length.y + 0.5) * box_length.y;
dr.z = dr.z - floorf(dr.z / box_length.z + 0.5) * box_length.z;
return dr;
}
__global__ void Is_need_refresh_neighbor_list_cuda(const int atom_numbers, const VECTOR *crd, const VECTOR *old_crd,
const VECTOR *box_length, const float half_skin_square,
int *need_refresh_flag) {
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < atom_numbers) {
VECTOR r1 = crd[i];
VECTOR r2 = old_crd[i];
r1 = Get_Periodic_Displacement_Update(r1, r2, box_length[0]);
float r1_2 = r1.x * r1.x + r1.y * r1.y + r1.z * r1.z;
if (r1_2 > half_skin_square) {
atomicExch(&need_refresh_flag[0], 1);
}
}
}
void Refresh_Neighbor_List(int *refresh_sign, const int thread, const int atom_numbers, VECTOR *crd, VECTOR *old_crd,
UNSIGNED_INT_VECTOR *uint_crd, float *crd_to_uint_crd_cof, float *uint_dr_to_dr_cof,
int *atom_in_grid_serial, const float skin, float *box_length, const GRID_POINTER *gpointer,
GRID_BUCKET *bucket, int *atom_numbers_in_grid_bucket, NEIGHBOR_LIST *d_nl,
int *excluded_list_start, int *excluded_list, int *excluded_numbers,
float cutoff_skin_square, int grid_numbers, float *grid_length_inverse, int *grid_N, int nxy,
cudaStream_t stream) {
std::vector<int> h_refresh_sign(1);
cudaMemcpyAsync(h_refresh_sign.data(), refresh_sign, sizeof(int), cudaMemcpyDeviceToHost, stream);
if (h_refresh_sign[0] == 1) {
Clear_Grid_Bucket<<<ceilf(static_cast<float>(grid_numbers) / thread), thread, 0, stream>>>(
grid_numbers, atom_numbers_in_grid_bucket, bucket);
Crd_Periodic_Map<<<ceilf(static_cast<float>(atom_numbers) / thread), thread, 0, stream>>>(atom_numbers, crd,
box_length);
Find_Atom_In_Grid_Serial<<<ceilf(static_cast<float>(atom_numbers) / thread), thread, 0, stream>>>(
atom_numbers, grid_length_inverse, crd, grid_N, nxy, atom_in_grid_serial);
Copy_List<<<ceilf(static_cast<float>(3. * atom_numbers) / thread), thread, 0, stream>>>(
3 * atom_numbers, reinterpret_cast<float *>(crd), reinterpret_cast<float *>(old_crd));
Put_Atom_In_Grid_Bucket<<<ceilf(static_cast<float>(atom_numbers) / thread), thread, 0, stream>>>(
atom_numbers, atom_in_grid_serial, bucket, atom_numbers_in_grid_bucket);
Crd_To_Uint_Crd<<<ceilf(static_cast<float>(atom_numbers) / thread), thread, 0, stream>>>(
atom_numbers, crd_to_uint_crd_cof, crd, uint_crd);
Find_atom_neighbors<<<ceilf(static_cast<float>(atom_numbers) / thread), thread, 0, stream>>>(
atom_numbers, uint_crd, uint_dr_to_dr_cof, atom_in_grid_serial, gpointer, bucket, atom_numbers_in_grid_bucket,
d_nl, cutoff_skin_square);
Delete_Excluded_Atoms_Serial_In_Neighbor_List<<<ceilf(static_cast<float>(atom_numbers) / thread), thread, 0,
stream>>>(atom_numbers, d_nl, excluded_list_start, excluded_list,
excluded_numbers);
h_refresh_sign[0] = 0;
}
}
void Refresh_Neighbor_List_No_Check(int grid_numbers, int atom_numbers, float skin, int nxy, float cutoff_skin_square,
int *grid_N, float *box_length, int *atom_numbers_in_grid_bucket,
float *grid_length_inverse, int *atom_in_grid_serial, GRID_BUCKET *bucket,
VECTOR *crd, VECTOR *old_crd, float *crd_to_uint_crd_cof,
UNSIGNED_INT_VECTOR *uint_crd, float *uint_dr_to_dr_cof, GRID_POINTER *gpointer,
NEIGHBOR_LIST *d_nl, int *excluded_list_start, int *excluded_list,
int *excluded_numbers, cudaStream_t stream) {
Clear_Grid_Bucket<<<ceilf(static_cast<float>(grid_numbers) / 32), 32, 0, stream>>>(
grid_numbers, atom_numbers_in_grid_bucket, bucket);
Crd_Periodic_Map<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(atom_numbers, crd, box_length);
Find_Atom_In_Grid_Serial<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
atom_numbers, grid_length_inverse, crd, grid_N, nxy, atom_in_grid_serial);
cudaMemcpyAsync(old_crd, crd, sizeof(VECTOR) * atom_numbers, cudaMemcpyDeviceToDevice, stream);
Put_Atom_In_Grid_Bucket<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
atom_numbers, atom_in_grid_serial, bucket, atom_numbers_in_grid_bucket);
Crd_To_Uint_Crd<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(atom_numbers, crd_to_uint_crd_cof,
crd, uint_crd);
Find_atom_neighbors<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
atom_numbers, uint_crd, uint_dr_to_dr_cof, atom_in_grid_serial, gpointer, bucket, atom_numbers_in_grid_bucket, d_nl,
cutoff_skin_square);
Delete_Excluded_Atoms_Serial_In_Neighbor_List<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
atom_numbers, d_nl, excluded_list_start, excluded_list, excluded_numbers);
}
void CopyNeighborList(int atom_numbers, int max_neighbor_numbers, NEIGHBOR_LIST *nl, int *nl_atom_numbers,
int *nl_atom_serial, cudaStream_t stream) {
copy_neighbor_list_atom_number<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>(
atom_numbers, max_neighbor_numbers, nl, nl_atom_numbers, nl_atom_serial);
}
void ConstructNeighborList(int atom_numbers, int max_neighbor_numbers, int *nl_atom_numbers, int *nl_atom_serial,
NEIGHBOR_LIST *nl, cudaStream_t stream) {
construct_neighbor_list_kernel<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>(
atom_numbers, max_neighbor_numbers, nl_atom_numbers, nl_atom_serial, nl);
}
int refresh_count = 0;
void NeighborListRefresh(int grid_numbers, int atom_numbers, int *d_refresh_count, int refresh_interval,
int not_first_time, float skin, int nxy, float cutoff_square, float cutoff_with_skin_square,
int *grid_N, float *box_length, int *atom_numbers_in_grid_bucket, float *grid_length_inverse,
int *atom_in_grid_serial, GRID_BUCKET *bucket, float *crd, float *old_crd,
float *crd_to_uint_crd_cof, float *half_crd_to_uint_crd_cof, unsigned int *uint_crd,
float *uint_dr_to_dr_cof, GRID_POINTER *gpointer, NEIGHBOR_LIST *d_nl,
int *excluded_list_start, int *excluded_list, int *excluded_numbers, float half_skin_square,
int *is_need_refresh_neighbor_list, int forced_update, int forced_check, cudaStream_t stream) {
if (forced_update) {
Mul_quarter<<<1, 3, 0, stream>>>(crd_to_uint_crd_cof, half_crd_to_uint_crd_cof);
Refresh_Neighbor_List_No_Check(
grid_numbers, atom_numbers, skin, nxy, cutoff_square, grid_N, box_length, atom_numbers_in_grid_bucket,
grid_length_inverse, atom_in_grid_serial, bucket, reinterpret_cast<VECTOR *>(crd),
reinterpret_cast<VECTOR *>(old_crd), half_crd_to_uint_crd_cof, reinterpret_cast<UNSIGNED_INT_VECTOR *>(uint_crd),
uint_dr_to_dr_cof, gpointer, d_nl, excluded_list_start, excluded_list, excluded_numbers, stream);
} else if (refresh_interval > 0 && !forced_check) {
if (refresh_count % refresh_interval == 0) {
Mul_quarter<<<1, 3, 0, stream>>>(crd_to_uint_crd_cof, half_crd_to_uint_crd_cof);
Refresh_Neighbor_List_No_Check(grid_numbers, atom_numbers, skin, nxy, cutoff_square, grid_N, box_length,
atom_numbers_in_grid_bucket, grid_length_inverse, atom_in_grid_serial, bucket,
reinterpret_cast<VECTOR *>(crd), reinterpret_cast<VECTOR *>(old_crd),
half_crd_to_uint_crd_cof, reinterpret_cast<UNSIGNED_INT_VECTOR *>(uint_crd),
uint_dr_to_dr_cof, gpointer, d_nl, excluded_list_start, excluded_list,
excluded_numbers, stream);
}
refresh_count += 1;
} else {
Is_need_refresh_neighbor_list_cuda<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>(
atom_numbers, reinterpret_cast<VECTOR *>(crd), reinterpret_cast<VECTOR *>(old_crd),
reinterpret_cast<VECTOR *>(box_length), half_skin_square, is_need_refresh_neighbor_list);
Mul_quarter<<<1, 3, 0, stream>>>(crd_to_uint_crd_cof, half_crd_to_uint_crd_cof);
Refresh_Neighbor_List(is_need_refresh_neighbor_list, 32, atom_numbers, reinterpret_cast<VECTOR *>(crd),
reinterpret_cast<VECTOR *>(old_crd), reinterpret_cast<UNSIGNED_INT_VECTOR *>(uint_crd),
half_crd_to_uint_crd_cof, uint_dr_to_dr_cof, atom_in_grid_serial, skin, box_length, gpointer,
bucket, atom_numbers_in_grid_bucket, d_nl, excluded_list_start, excluded_list,
excluded_numbers, cutoff_with_skin_square, grid_numbers, grid_length_inverse, grid_N, nxy,
stream);
}
}

View File

@ -1,82 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Note:
* NeighborListUpdate. This is an experimental interface that is subject to change and/or deletion.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NEIGHBOR_LIST_IMPL_H_
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NEIGHBOR_LIST_IMPL_H_
#include <curand_kernel.h>
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
struct VECTOR {
float x;
float y;
float z;
};
struct INT_VECTOR {
int int_x;
int int_y;
int int_z;
};
struct UNSIGNED_INT_VECTOR {
unsigned int uint_x;
unsigned int uint_y;
unsigned int uint_z;
};
struct NEIGHBOR_LIST {
int atom_numbers;
int *atom_serial;
};
struct GRID_BUCKET {
int *atom_serial;
};
struct GRID_POINTER {
int *grid_serial;
};
CUDA_LIB_EXPORT void ConstructNeighborList(int grid_numbers, int max_neighbor_numbers, int *nl_atom_numbers,
int *nl_atom_serial, NEIGHBOR_LIST *nl, cudaStream_t stream);
CUDA_LIB_EXPORT void CopyNeighborList(int atom_numbers, int max_neighbor_numbers, NEIGHBOR_LIST *nl,
int *nl_atom_numbers, int *nl_atom_serial, cudaStream_t stream);
CUDA_LIB_EXPORT void NeighborListRefresh(
int grid_numbers, int atom_numbers, int *d_refresh_count, int refresh_interval, int not_first_time, float skin,
int nxy, float cutoff_square, float cutoff_with_skin_square, int *grid_N, float *box_length,
int *atom_numbers_in_grid_bucket, float *grid_length_inverse, int *atom_in_grid_serial, GRID_BUCKET *bucket,
float *crd, float *old_crd, float *crd_to_uint_crd_cof, float *half_crd_to_uint_crd_cof, unsigned int *uint_crd,
float *uint_dr_to_dr_cof, GRID_POINTER *gpointer, NEIGHBOR_LIST *d_nl, int *excluded_list_start, int *excluded_list,
int *excluded_numbers, float half_skin_square, int *is_need_refresh_neighbor_list, int forced_update,
int forced_check, cudaStream_t stream);
CUDA_LIB_EXPORT void ConstructNeighborListHalf(int grid_numbers, int max_neighbor_numbers, int *nl_atom_numbers,
int *nl_atom_serial, NEIGHBOR_LIST *nl, cudaStream_t stream);
CUDA_LIB_EXPORT void CopyNeighborListHalf(int atom_numbers, NEIGHBOR_LIST *nl, int *nl_atom_numbers,
cudaStream_t stream);
CUDA_LIB_EXPORT void NeighborListUpdate(
int grid_numbers, int atom_numbers, int *d_refresh_count, int refresh_interval, int not_first_time, float skin,
int nxy, float cutoff_square, float cutoff_with_skin_square, int *grid_N, float *box_length,
int *atom_numbers_in_grid_bucket, float *grid_length_inverse, int *atom_in_grid_serial, GRID_BUCKET *bucket,
float *crd, float *old_crd, float *crd_to_uint_crd_cof, float *half_crd_to_uint_crd_cof, unsigned int *uint_crd,
float *uint_dr_to_dr_cof, GRID_POINTER *gpointer, NEIGHBOR_LIST *d_nl, int *excluded_list_start, int *excluded_list,
int *excluded_numbers, float half_skin_square, int *is_need_refresh_neighbor_list, cudaStream_t stream);
#endif

View File

@ -1,41 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "plugin/device/gpu/kernel/cuda_impl/sponge/nvtit/md_iteration_gradient_descent_impl.cuh"
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/util.cuh"
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common_sponge.cuh"
__global__ void MD_Iteration_Gradient_Descent(const int atom_numbers, VECTOR *crd, VECTOR *frc,
const float learning_rate) {
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < atom_numbers) {
crd[i].x = crd[i].x + learning_rate * frc[i].x;
crd[i].y = crd[i].y + learning_rate * frc[i].y;
crd[i].z = crd[i].z + learning_rate * frc[i].z;
frc[i].x = 0.;
frc[i].y = 0.;
frc[i].z = 0.;
}
}
void MDIterationGradientDescent(const int atom_numbers, float *crd, float *frc, const float learning_rate,
cudaStream_t stream) {
VECTOR *d_crd = reinterpret_cast<VECTOR *>(crd);
VECTOR *d_frc = reinterpret_cast<VECTOR *>(frc);
MD_Iteration_Gradient_Descent<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>(
atom_numbers, d_crd, d_frc, learning_rate);
}

View File

@ -1,26 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_MD_ITERATION_GRADIENT_DESCENT_IMPL_H_
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_MD_ITERATION_GRADIENT_DESCENT_IMPL_H_
#include <curand_kernel.h>
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
CUDA_LIB_EXPORT void MDIterationGradientDescent(const int atom_numbers, float *crd, float *frc,
const float learning_rate, cudaStream_t stream);
#endif

View File

@ -1,54 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Note:
* MDIterationLeapFrog. This is an experimental interface that is subject to change and/or deletion.
*/
#include "plugin/device/gpu/kernel/cuda_impl/sponge/nvtit/md_iteration_leap_frog_impl.cuh"
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common_sponge.cuh"
__global__ void MD_Iteration_Leap_Frog(const int atom_numbers, VECTOR *vel, VECTOR *crd, VECTOR *frc, VECTOR *acc,
const float *inverse_mass, const float dt) {
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < atom_numbers) {
acc[i].x = inverse_mass[i] * frc[i].x;
acc[i].y = inverse_mass[i] * frc[i].y;
acc[i].z = inverse_mass[i] * frc[i].z;
vel[i].x = vel[i].x + dt * acc[i].x;
vel[i].y = vel[i].y + dt * acc[i].y;
vel[i].z = vel[i].z + dt * acc[i].z;
crd[i].x = crd[i].x + dt * vel[i].x;
crd[i].y = crd[i].y + dt * vel[i].y;
crd[i].z = crd[i].z + dt * vel[i].z;
frc[i].x = 0.;
frc[i].y = 0.;
frc[i].z = 0.;
}
}
void MDIterationLeapFrog(const int atom_numbers, float *vel, float *crd, float *frc, float *acc,
const float *inverse_mass, const float dt, cudaStream_t stream) {
VECTOR *d_vel = reinterpret_cast<VECTOR *>(vel);
VECTOR *d_crd = reinterpret_cast<VECTOR *>(crd);
VECTOR *d_frc = reinterpret_cast<VECTOR *>(frc);
VECTOR *d_acc = reinterpret_cast<VECTOR *>(acc);
MD_Iteration_Leap_Frog<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>(
atom_numbers, d_vel, d_crd, d_frc, d_acc, inverse_mass, dt);
}

View File

@ -1,31 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Note:
* MDIterationLeapFrog. This is an experimental interface that is subject to change and/or deletion.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NVTIT_MD_ITERATION_LEAP_FROG_IMPL_H
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NVTIT_MD_ITERATION_LEAP_FROG_IMPL_H
#include <curand_kernel.h>
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
CUDA_LIB_EXPORT void MDIterationLeapFrog(const int atom_numbers, float *vel, float *crd, float *frc, float *acc,
const float *inverse_mass, const float dt, cudaStream_t stream);
#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_MD_ITERATION_LEAP_FROG_LIUJIAN_GPU_IMPL_H_

View File

@ -1,67 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "plugin/device/gpu/kernel/cuda_impl/sponge/nvtit/md_iteration_leap_frog_liujian_gpu_impl.cuh"
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/util.cuh"
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common_sponge.cuh"
__global__ void MD_Iteration_Leap_Frog_With_LiuJian_kernel(const int atom_numbers, const float half_dt, const float dt,
const float exp_gamma, float *inverse_mass,
float *sqrt_mass_inverse, VECTOR *vel, VECTOR *crd,
VECTOR *frc, VECTOR *acc, VECTOR *random_frc,
VECTOR *output) {
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < atom_numbers) {
acc[i].x = inverse_mass[i] * frc[i].x;
acc[i].y = inverse_mass[i] * frc[i].y;
acc[i].z = inverse_mass[i] * frc[i].z;
vel[i].x = vel[i].x + dt * acc[i].x;
vel[i].y = vel[i].y + dt * acc[i].y;
vel[i].z = vel[i].z + dt * acc[i].z;
output[i].x = crd[i].x + half_dt * vel[i].x;
output[i].y = crd[i].y + half_dt * vel[i].y;
output[i].z = crd[i].z + half_dt * vel[i].z;
vel[i].x = exp_gamma * vel[i].x + sqrt_mass_inverse[i] * random_frc[i].x;
vel[i].y = exp_gamma * vel[i].y + sqrt_mass_inverse[i] * random_frc[i].y;
vel[i].z = exp_gamma * vel[i].z + sqrt_mass_inverse[i] * random_frc[i].z;
output[i].x = output[i].x + half_dt * vel[i].x;
output[i].y = output[i].y + half_dt * vel[i].y;
output[i].z = output[i].z + half_dt * vel[i].z;
}
}
void MD_Iteration_Leap_Frog_With_LiuJian(const int atom_numbers, const float half_dt, const float dt,
const float exp_gamma, int float4_numbers, float *inverse_mass,
float *sqrt_mass_inverse, float *vel, float *crd, float *frc, float *acc,
curandStatePhilox4_32_10_t *rand_state, float *rand_frc, float *output,
cudaStream_t stream) {
Rand_Normal<<<ceilf(static_cast<float>(float4_numbers) / 32.), 32, 0, stream>>>(float4_numbers, rand_state,
reinterpret_cast<float4 *>(rand_frc));
VECTOR *d_vel = reinterpret_cast<VECTOR *>(vel);
VECTOR *d_crd = reinterpret_cast<VECTOR *>(crd);
VECTOR *d_frc = reinterpret_cast<VECTOR *>(frc);
VECTOR *d_acc = reinterpret_cast<VECTOR *>(acc);
VECTOR *d_rand_frc = reinterpret_cast<VECTOR *>(rand_frc);
VECTOR *d_out = reinterpret_cast<VECTOR *>(output);
MD_Iteration_Leap_Frog_With_LiuJian_kernel<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
atom_numbers, half_dt, dt, exp_gamma, inverse_mass, sqrt_mass_inverse, d_vel, d_crd, d_frc, d_acc, d_rand_frc,
d_out);
}

View File

@ -1,29 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_MD_ITERATION_LEAP_FROG_LIUJIAN_GPU_IMPL_H_
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_MD_ITERATION_LEAP_FROG_LIUJIAN_GPU_IMPL_H_
#include <curand_kernel.h>
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
CUDA_LIB_EXPORT void MD_Iteration_Leap_Frog_With_LiuJian(const int atom_numbers, const float half_dt, const float dt,
const float exp_gamma, int float4_numbers, float *inverse_mass,
float *sqrt_mass_inverse, float *vel, float *crd, float *frc,
float *acc, curandStatePhilox4_32_10_t *rand_state,
float *rand_frc, float *output, cudaStream_t stream);
#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_MD_ITERATION_LEAP_FROG_LIUJIAN_GPU_IMPL_H_

View File

@ -1,80 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "plugin/device/gpu/kernel/cuda_impl/sponge/nvtit/md_iteration_leap_frog_liujian_with_max_vel_impl.cuh"
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/util.cuh"
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common_sponge.cuh"
__global__ void MD_Iteration_Leap_Frog_With_LiuJian_With_Max_Velocity(
const int atom_numbers, const float half_dt, const float dt, const float exp_gamma, const float *inverse_mass,
const float *sqrt_mass_inverse, VECTOR *vel, VECTOR *crd, VECTOR *frc, VECTOR *acc, VECTOR *random_frc,
VECTOR *output, const float max_vel) {
int i = blockDim.x * blockIdx.x + threadIdx.x;
float abs_vel;
if (i < atom_numbers) {
acc[i].x = inverse_mass[i] * frc[i].x;
acc[i].y = inverse_mass[i] * frc[i].y;
acc[i].z = inverse_mass[i] * frc[i].z;
vel[i].x = vel[i].x + dt * acc[i].x;
vel[i].y = vel[i].y + dt * acc[i].y;
vel[i].z = vel[i].z + dt * acc[i].z;
abs_vel = norm3df(vel[i].x, vel[i].y, vel[i].z);
if (abs_vel < max_vel) {
} else {
abs_vel = max_vel / abs_vel;
vel[i].x = abs_vel * vel[i].x;
vel[i].y = abs_vel * vel[i].y;
vel[i].z = abs_vel * vel[i].z;
}
output[i].x = crd[i].x + half_dt * vel[i].x;
output[i].y = crd[i].y + half_dt * vel[i].y;
output[i].z = crd[i].z + half_dt * vel[i].z;
vel[i].x = exp_gamma * vel[i].x + sqrt_mass_inverse[i] * random_frc[i].x;
vel[i].y = exp_gamma * vel[i].y + sqrt_mass_inverse[i] * random_frc[i].y;
vel[i].z = exp_gamma * vel[i].z + sqrt_mass_inverse[i] * random_frc[i].z;
output[i].x = output[i].x + half_dt * vel[i].x;
output[i].y = output[i].y + half_dt * vel[i].y;
output[i].z = output[i].z + half_dt * vel[i].z;
frc[i].x = 0.;
frc[i].y = 0.;
frc[i].z = 0.;
}
}
void MD_Iteration_Leap_Frog_With_LiuJian_With_Max_Vel(const int atom_numbers, const float half_dt, const float dt,
const float exp_gamma, int float4_numbers, float *inverse_mass,
float *sqrt_mass_inverse, float *vel, float *crd, float *frc,
float *acc, curandStatePhilox4_32_10_t *rand_state,
float *rand_frc, float *output, const float max_vel,
cudaStream_t stream) {
Rand_Normal<<<ceilf(static_cast<float>(float4_numbers) / 32.), 32, 0, stream>>>(float4_numbers, rand_state,
reinterpret_cast<float4 *>(rand_frc));
VECTOR *d_vel = reinterpret_cast<VECTOR *>(vel);
VECTOR *d_crd = reinterpret_cast<VECTOR *>(crd);
VECTOR *d_frc = reinterpret_cast<VECTOR *>(frc);
VECTOR *d_acc = reinterpret_cast<VECTOR *>(acc);
VECTOR *d_rand_frc = reinterpret_cast<VECTOR *>(rand_frc);
VECTOR *d_out = reinterpret_cast<VECTOR *>(output);
MD_Iteration_Leap_Frog_With_LiuJian_With_Max_Velocity<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0,
stream>>>(atom_numbers, half_dt, dt, exp_gamma, inverse_mass,
sqrt_mass_inverse, d_vel, d_crd, d_frc, d_acc,
d_rand_frc, d_out, max_vel);
}

View File

@ -1,28 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_MD_ITERATION_LEAP_FROG_LIUJIAN_WITH_MAX_VEL_IMPL_H_
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_MD_ITERATION_LEAP_FROG_LIUJIAN_WITH_MAX_VEL_IMPL_H_
#include <curand_kernel.h>
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
CUDA_LIB_EXPORT void MD_Iteration_Leap_Frog_With_LiuJian_With_Max_Vel(
const int atom_numbers, const float half_dt, const float dt, const float exp_gamma, int float4_numbers,
float *inverse_mass, float *sqrt_mass_inverse, float *vel, float *crd, float *frc, float *acc,
curandStatePhilox4_32_10_t *rand_state, float *rand_frc, float *output, const float max_vel, cudaStream_t stream);
#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_MD_ITERATION_LEAP_FROG_LIUJIAN_WITH_MAX_VEL_IMPL_H_

View File

@ -1,44 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "plugin/device/gpu/kernel/cuda_impl/sponge/nvtit/md_iteration_leap_frog_with_max_vel_impl.cuh"
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/util.cuh"
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common_sponge.cuh"
__global__ void MD_Iteration_Leap_Frog_With_Max_Velocity(const int atom_numbers, VECTOR *vel, VECTOR *crd, VECTOR *frc,
VECTOR *acc, const float *inverse_mass, const float dt,
const float max_velocity) {
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < atom_numbers) {
VECTOR acc_i = inverse_mass[i] * frc[i];
VECTOR vel_i = vel[i] + dt * acc_i;
vel_i = Make_Vector_Not_Exceed_Value(vel_i, max_velocity);
vel[i] = vel_i;
crd[i] = crd[i] + dt * vel_i;
frc[i] = {0.0f, 0.0f, 0.0f};
}
}
void MDIterationLeapFrogWithMaxVelocity(const int atom_numbers, float *vel, float *crd, float *frc, float *acc,
const float *inverse_mass, const float dt, const float max_velocity,
cudaStream_t stream) {
VECTOR *d_vel = reinterpret_cast<VECTOR *>(vel);
VECTOR *d_crd = reinterpret_cast<VECTOR *>(crd);
VECTOR *d_frc = reinterpret_cast<VECTOR *>(frc);
VECTOR *d_acc = reinterpret_cast<VECTOR *>(acc);
MD_Iteration_Leap_Frog_With_Max_Velocity<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>(
atom_numbers, d_vel, d_crd, d_frc, d_acc, inverse_mass, dt, max_velocity);
}

View File

@ -1,27 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_MD_ITERATION_LEAP_FROG_WITH_MAX_VEL_IMPL_H_
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_MD_ITERATION_LEAP_FROG_WITH_MAX_VEL_IMPL_H_
#include <curand_kernel.h>
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
CUDA_LIB_EXPORT void MDIterationLeapFrogWithMaxVelocity(const int atom_numbers, float *vel, float *crd, float *frc,
float *acc, const float *inverse_mass, const float dt,
const float max_velocity, cudaStream_t stream);
#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_MD_ITERATION_LEAP_FROG_WITH_MAX_VEL_IMPL_H_

View File

@ -1,28 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "plugin/device/gpu/kernel/cuda_impl/sponge/nvtit/md_iteration_setup_random_state_gpu_impl.cuh"
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/util.cuh"
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common_sponge.cuh"
void MD_Iteration_Setup_Random_State(int float4_numbers, curandStatePhilox4_32_10_t *rand_state, int seed,
cudaStream_t stream) {
Setup_Rand_Normal_Kernel<<<ceilf(static_cast<float>(float4_numbers) / 32.), 32, 0, stream>>>(float4_numbers,
rand_state, seed);
}
void MD_Iteration_Setup_Random_State(int float4_numbers, curandStatePhilox4_32_10_t *rand_state, int seed,
cudaStream_t stream);

View File

@ -1,25 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_MD_ITERATION_SETUP_RANDOM_STATE_GPU_IMPL_H_
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_MD_ITERATION_SETUP_RANDOM_STATE_GPU_IMPL_H_
#include <curand_kernel.h>
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
CUDA_LIB_EXPORT void MD_Iteration_Setup_Random_State(int float4_numbers, curandStatePhilox4_32_10_t *rand_state,
int seed, cudaStream_t stream);
#endif

View File

@ -1,27 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "plugin/device/gpu/kernel/cuda_impl/sponge/pme/fft_3d_impl.cuh"
#include "plugin/device/gpu/kernel/cuda_impl/sponge/pme/pme_common.cuh"
template <typename T>
void FFT3D(int Nfft, T *input_tensor, Complex<T> *output_tensor, const cufftHandle &FFT_plan_r2c, cudaStream_t stream) {
cufftExecR2C(FFT_plan_r2c, input_tensor, reinterpret_cast<cufftComplex *>(output_tensor));
return;
}
template CUDA_LIB_EXPORT
void FFT3D<float>(int Nfft, float *input_tensor, Complex<float> *output_tensor,
const cufftHandle &FFT_plan_r2c, cudaStream_t stream);

View File

@ -1,28 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_PME_FFT_3D_IMPL_H_
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_PME_FFT_3D_IMPL_H_
#include <cufft.h>
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/complex.h"
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
template <typename T>
CUDA_LIB_EXPORT void FFT3D(int Nfft, T *input_tensor, Complex<T> *output_tensor, const cufftHandle &FFT_plan_r2c,
cudaStream_t stream);
#endif

View File

@ -1,28 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "plugin/device/gpu/kernel/cuda_impl/sponge/pme/ifft_3d_impl.cuh"
#include "plugin/device/gpu/kernel/cuda_impl/sponge/pme/pme_common.cuh"
template <typename T>
void IFFT3D(int Nfft, Complex<T> *input_tensor, T *output_tensor, const cufftHandle &FFT_plan_c2r,
cudaStream_t stream) {
cufftExecC2R(FFT_plan_c2r, reinterpret_cast<cufftComplex *>(input_tensor), output_tensor);
return;
}
template CUDA_LIB_EXPORT
void IFFT3D<float>(int Nfft, Complex<float> *input_tensor, float *output_tensor,
const cufftHandle &FFT_plan_c2r, cudaStream_t stream);

View File

@ -1,28 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_PME_IFFT_3D_IMPL_H_
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_PME_IFFT_3D_IMPL_H_
#include <cufft.h>
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/complex.h"
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
template <typename T>
CUDA_LIB_EXPORT void IFFT3D(int Nfft, Complex<T> *input_tensor, T *output_tensor, const cufftHandle &FFT_plan_c2r,
cudaStream_t stream);
#endif

View File

@ -1,29 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "plugin/device/gpu/kernel/cuda_impl/sponge/pme/pme_batched_fft_2d_impl.cuh"
#include "plugin/device/gpu/kernel/cuda_impl/sponge/pme/pme_common.cuh"
template <typename T>
void PMEBatchedFFT2D(Complex<T> *input_tensor, Complex<T> *output_tensor,
const cufftHandle &FFT_plan_c2c, int direction, cudaStream_t stream) {
cufftExecC2C(FFT_plan_c2c, reinterpret_cast<cufftComplex *>(input_tensor),
reinterpret_cast<cufftComplex *>(output_tensor), direction);
return;
}
template CUDA_LIB_EXPORT
void PMEBatchedFFT2D<float>(Complex<float> *input_tensor, Complex<float> *output_tensor,
const cufftHandle &FFT_plan_c2c, int direction, cudaStream_t stream);

View File

@ -1,28 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_PME_PME_BATCHED_FFT_2D_IMPL_H_
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_PME_PME_BATCHED_FFT_2D_IMPL_H_
#include <cufft.h>
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/complex.h"
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
template <typename T>
CUDA_LIB_EXPORT void PMEBatchedFFT2D(Complex<T> *input_tensor, Complex<T> *output_tensor,
const cufftHandle &FFT_plan_c2c, int direction, cudaStream_t stream);
#endif

View File

@ -1,357 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Note:
* PME_Common. This is an experimental interface that is subject to change and/or deletion.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_PME_PME_COMMON_H_
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_PME_PME_COMMON_H_
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common_sponge.cuh"
__constant__ float PME_Ma[4] = {1.0 / 6.0, -0.5, 0.5, -1.0 / 6.0};
__constant__ float PME_Mb[4] = {0, 0.5, -1, 0.5};
__constant__ float PME_Mc[4] = {0, 0.5, 0, -0.5};
__constant__ float PME_Md[4] = {0, 1.0 / 6.0, 4.0 / 6.0, 1.0 / 6.0};
__constant__ float PME_dMa[4] = {0.5, -1.5, 1.5, -0.5};
__constant__ float PME_dMb[4] = {0, 1, -2, 1};
__constant__ float PME_dMc[4] = {0, 0.5, 0, -0.5};
#define PI 3.1415926
const float periodic_factor_inverse = 2.3283064365387e-10;
static dim3 thread_PME;
const float cutoff = 10.0;
const float tolerance = 0.00001;
static float M_(float u, int n) {
if (n == 2) {
if (u > 2 || u < 0) return 0;
return 1 - abs(u - 1);
} else {
return u / (n - 1) * M_(u, n - 1) + (n - u) / (n - 1) * M_(u - 1, n - 1);
}
}
static float Get_Beta(float cutoff, float tolerance) {
float beta, low, high, tempf;
int ilow, ihigh;
high = 1.0;
ihigh = 1;
while (1) {
tempf = erfc(high * cutoff) / cutoff;
if (tempf <= tolerance) break;
high *= 2;
ihigh++;
}
ihigh += 50;
low = 0.0;
for (ilow = 1; ilow < ihigh; ilow++) {
beta = (low + high) / 2;
tempf = erfc(beta * cutoff) / cutoff;
if (tempf >= tolerance)
low = beta;
else
high = beta;
}
return beta;
}
static cufftComplex expc(cufftComplex z) {
cufftComplex res;
float t = expf(z.x);
sincosf(z.y, &res.y, &res.x);
res.x *= t;
res.y *= t;
return res;
}
static float getb(int k, int NFFT, int B_order) {
cufftComplex tempc, tempc2, res;
float tempf;
tempc2.x = 0;
tempc2.y = 0;
tempc.x = 0;
tempc.y = 2 * (B_order - 1) * PI * k / NFFT;
res = expc(tempc);
for (int kk = 0; kk < (B_order - 1); kk++) {
tempc.x = 0;
tempc.y = 2 * PI * k / NFFT * kk;
tempc = expc(tempc);
tempf = M_(kk + 1, B_order);
tempc2.x += tempf * tempc.x;
tempc2.y += tempf * tempc.y;
}
res = cuCdivf(res, tempc2);
return res.x * res.x + res.y * res.y;
}
__global__ static void device_add(float *ene, float *factor, float *charge_sum) {
ene[0] += factor[0] * charge_sum[0] * charge_sum[0];
}
__global__ static void PME_Atom_Near(const UNSIGNED_INT_VECTOR *uint_crd, int *PME_atom_near, const int PME_Nin,
const float periodic_factor_inverse_x, const float periodic_factor_inverse_y,
const float periodic_factor_inverse_z, const int atom_numbers, const int fftx,
const int ffty, const int fftz, const UNSIGNED_INT_VECTOR *PME_kxyz,
UNSIGNED_INT_VECTOR *PME_uxyz, VECTOR *PME_frxyz) {
int atom = blockDim.x * blockIdx.x + threadIdx.x;
if (atom < atom_numbers) {
UNSIGNED_INT_VECTOR *temp_uxyz = &PME_uxyz[atom];
int k, tempux, tempuy, tempuz;
float tempf;
tempf = static_cast<float>(uint_crd[atom].uint_x) * periodic_factor_inverse_x;
tempux = static_cast<int>(tempf);
PME_frxyz[atom].x = tempf - tempux;
tempf = static_cast<float>(uint_crd[atom].uint_y) * periodic_factor_inverse_y;
tempuy = static_cast<int>(tempf);
PME_frxyz[atom].y = tempf - tempuy;
tempf = static_cast<float>(uint_crd[atom].uint_z) * periodic_factor_inverse_z;
tempuz = static_cast<int>(tempf);
PME_frxyz[atom].z = tempf - tempuz;
if (tempux != (*temp_uxyz).uint_x || tempuy != (*temp_uxyz).uint_y || tempuz != (*temp_uxyz).uint_z) {
(*temp_uxyz).uint_x = tempux;
(*temp_uxyz).uint_y = tempuy;
(*temp_uxyz).uint_z = tempuz;
int *temp_near = PME_atom_near + atom * 64;
int kx, ky, kz;
for (k = 0; k < 64; k++) {
UNSIGNED_INT_VECTOR temp_kxyz = PME_kxyz[k];
kx = tempux - temp_kxyz.uint_x;
if (kx < 0) kx += fftx;
if (kx > fftx) kx -= fftx;
ky = tempuy - temp_kxyz.uint_y;
if (ky < 0) ky += ffty;
if (ky > ffty) ky -= ffty;
kz = tempuz - temp_kxyz.uint_z;
if (kz < 0) kz += fftz;
if (kz > fftz) kz -= fftz;
temp_near[k] = kx * PME_Nin + ky * fftz + kz;
}
}
}
}
__global__ static void PME_Q_Spread(int *PME_atom_near, const float *charge, const VECTOR *PME_frxyz, float *PME_Q,
const UNSIGNED_INT_VECTOR *PME_kxyz, const int atom_numbers) {
int atom = blockDim.x * blockIdx.x + threadIdx.x;
if (atom < atom_numbers) {
int k;
float tempf, tempQ, tempf2;
int *temp_near = PME_atom_near + atom * 64;
VECTOR temp_frxyz = PME_frxyz[atom];
float tempcharge = charge[atom];
UNSIGNED_INT_VECTOR temp_kxyz;
unsigned int kx;
for (k = threadIdx.y; k < 64; k = k + blockDim.y) {
temp_kxyz = PME_kxyz[k];
kx = temp_kxyz.uint_x;
tempf = (temp_frxyz.x);
tempf2 = tempf * tempf;
tempf = PME_Ma[kx] * tempf * tempf2 + PME_Mb[kx] * tempf2 + PME_Mc[kx] * tempf + PME_Md[kx];
tempQ = tempcharge * tempf;
kx = temp_kxyz.uint_y;
tempf = (temp_frxyz.y);
tempf2 = tempf * tempf;
tempf = PME_Ma[kx] * tempf * tempf2 + PME_Mb[kx] * tempf2 + PME_Mc[kx] * tempf + PME_Md[kx];
tempQ = tempQ * tempf;
kx = temp_kxyz.uint_z;
tempf = (temp_frxyz.z);
tempf2 = tempf * tempf;
tempf = PME_Ma[kx] * tempf * tempf2 + PME_Mb[kx] * tempf2 + PME_Mc[kx] * tempf + PME_Md[kx];
tempQ = tempQ * tempf;
atomicAdd(&PME_Q[temp_near[k]], tempQ);
}
}
}
__global__ static void PME_Direct_Energy(const int atom_numbers, const NEIGHBOR_LIST *nl,
const UNSIGNED_INT_VECTOR *uint_crd, const VECTOR *boxlength,
const float *charge, const float beta, const float cutoff_square,
float *direct_ene) {
int atom_i = blockDim.x * blockIdx.x + threadIdx.x;
if (atom_i < atom_numbers) {
NEIGHBOR_LIST nl_i = nl[atom_i];
int N = nl_i.atom_numbers;
int atom_j;
int int_x;
int int_y;
int int_z;
UNSIGNED_INT_VECTOR r1 = uint_crd[atom_i], r2;
VECTOR dr;
float dr2;
float dr_abs;
// float dr_inverse;
float ene_temp;
float charge_i = charge[atom_i];
float ene_lin = 0.;
// int x, y;
// int atom_pair_LJ_type;
for (int j = threadIdx.y; j < N; j = j + blockDim.y) {
atom_j = nl_i.atom_serial[j];
r2 = uint_crd[atom_j];
int_x = r2.uint_x - r1.uint_x;
int_y = r2.uint_y - r1.uint_y;
int_z = r2.uint_z - r1.uint_z;
dr.x = boxlength[0].x * int_x;
dr.y = boxlength[0].y * int_y;
dr.z = boxlength[0].z * int_z;
dr2 = dr.x * dr.x + dr.y * dr.y + dr.z * dr.z;
if (dr2 < cutoff_square) {
dr_abs = norm3df(dr.x, dr.y, dr.z);
ene_temp = charge_i * charge[atom_j] * erfcf(beta * dr_abs) / dr_abs;
ene_lin = ene_lin + ene_temp;
}
}
atomicAdd(direct_ene, ene_lin);
}
}
__global__ static void PME_Direct_Atom_Energy(const int atom_numbers, const NEIGHBOR_LIST *nl,
const UNSIGNED_INT_VECTOR *uint_crd, const VECTOR *boxlength,
const float *charge, const float beta, const float cutoff_square,
float *direct_ene) {
int atom_i = blockDim.x * blockIdx.x + threadIdx.x;
if (atom_i < atom_numbers) {
NEIGHBOR_LIST nl_i = nl[atom_i];
int N = nl_i.atom_numbers;
int atom_j;
int int_x;
int int_y;
int int_z;
UNSIGNED_INT_VECTOR r1 = uint_crd[atom_i], r2;
VECTOR dr;
float dr2;
float dr_abs;
// float dr_inverse;
float ene_temp;
float charge_i = charge[atom_i];
float ene_lin = 0.;
for (int j = threadIdx.y; j < N; j = j + blockDim.y) {
atom_j = nl_i.atom_serial[j];
r2 = uint_crd[atom_j];
int_x = r2.uint_x - r1.uint_x;
int_y = r2.uint_y - r1.uint_y;
int_z = r2.uint_z - r1.uint_z;
dr.x = boxlength[0].x * int_x;
dr.y = boxlength[0].y * int_y;
dr.z = boxlength[0].z * int_z;
dr2 = dr.x * dr.x + dr.y * dr.y + dr.z * dr.z;
if (dr2 < cutoff_square) {
dr_abs = norm3df(dr.x, dr.y, dr.z);
ene_temp = charge_i * charge[atom_j] * erfcf(beta * dr_abs) / dr_abs;
ene_lin = ene_lin + ene_temp;
}
}
atomicAdd(&direct_ene[atom_i], ene_lin);
}
}
__global__ static void PME_Energy_Product(const int element_number, const float *list1, const float *list2,
float *sum) {
if (threadIdx.x == 0) {
sum[0] = 0.;
}
__syncthreads();
float lin = 0.0;
for (int i = threadIdx.x; i < element_number; i = i + blockDim.x) {
lin = lin + list1[i] * list2[i];
}
atomicAdd(sum, lin);
}
__global__ static void PME_BCFQ(cufftComplex *PME_FQ, float *PME_BC, int PME_Nfft) {
int index = blockDim.x * blockIdx.x + threadIdx.x;
if (index < PME_Nfft) {
float tempf = PME_BC[index];
cufftComplex tempc = PME_FQ[index];
PME_FQ[index].x = tempc.x * tempf;
PME_FQ[index].y = tempc.y * tempf;
}
}
__global__ static void PME_Excluded_Energy_Correction(const int atom_numbers, const UNSIGNED_INT_VECTOR *uint_crd,
const VECTOR *sacler, const float *charge, const float pme_beta,
const float sqrt_pi, const int *excluded_list_start,
const int *excluded_list, const int *excluded_atom_numbers,
float *ene) {
int atom_i = blockDim.x * blockIdx.x + threadIdx.x;
if (atom_i < atom_numbers) {
int excluded_number = excluded_atom_numbers[atom_i];
if (excluded_number > 0) {
int list_start = excluded_list_start[atom_i];
// int atom_min = excluded_list[list_start];
int list_end = list_start + excluded_number;
int atom_j;
int int_x;
int int_y;
int int_z;
float charge_i = charge[atom_i];
float charge_j;
float dr_abs;
float beta_dr;
UNSIGNED_INT_VECTOR r1 = uint_crd[atom_i], r2;
VECTOR dr;
float dr2;
float ene_lin = 0.;
for (int i = list_start; i < list_end; i = i + 1) {
atom_j = excluded_list[i];
r2 = uint_crd[atom_j];
charge_j = charge[atom_j];
int_x = r2.uint_x - r1.uint_x;
int_y = r2.uint_y - r1.uint_y;
int_z = r2.uint_z - r1.uint_z;
dr.x = sacler[0].x * int_x;
dr.y = sacler[0].y * int_y;
dr.z = sacler[0].z * int_z;
dr2 = dr.x * dr.x + dr.y * dr.y + dr.z * dr.z;
dr_abs = sqrtf(dr2);
beta_dr = pme_beta * dr_abs;
ene_lin -= charge_i * charge_j * erff(beta_dr) / dr_abs;
}
atomicAdd(ene, ene_lin);
}
}
}
#endif

View File

@ -1,85 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Note:
* PMEEnergy. This is an experimental interface that is subject to change and/or deletion.
*/
#include "plugin/device/gpu/kernel/cuda_impl/sponge/pme/pme_energy_impl.cuh"
#include "plugin/device/gpu/kernel/cuda_impl/sponge/pme/pme_common.cuh"
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common_sponge.cuh"
__global__ void PME_Energy_Reciprocal(const int element_number, const cufftComplex *FQ, const float *BC, float *sum) {
if (threadIdx.x == 0) {
sum[0] = 0.;
}
__syncthreads();
float lin = 0.0;
cufftComplex FQ_i;
for (int i = threadIdx.x; i < element_number; i = i + blockDim.x) {
FQ_i = FQ[i];
lin = lin + (FQ_i.x * FQ_i.x + FQ_i.y * FQ_i.y) * BC[i];
}
atomicAdd(sum, lin);
}
void PMEEnergy(int fftx, int ffty, int fftz, int atom_numbers, float beta, float *PME_BC, int *pme_uxyz,
float *pme_frxyz, float *PME_Q, float *pme_fq, int *PME_atom_near, int *pme_kxyz, const int *uint_crd_f,
const float *charge, int *nl_atom_numbers, int *nl_atom_serial, int *nl, const float *scaler_f,
const int *excluded_list_start, const int *excluded_list, const int *excluded_atom_numbers,
float *d_reciprocal_ene, float *d_self_ene, float *d_direct_ene, float *d_correction_ene,
dim3 thread_PME, int PME_Nin, int PME_Nfft, int PME_Nall, const cufftHandle &PME_plan_r2c,
const cufftHandle &PME_plan_c2r, cudaStream_t stream) {
UNSIGNED_INT_VECTOR *uint_crd =
const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f));
VECTOR *scaler = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(scaler_f));
int max_neighbor_numbers = 800;
NEIGHBOR_LIST *nl_a = reinterpret_cast<NEIGHBOR_LIST *>(nl);
construct_neighbor_list_kernel<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>(
atom_numbers, max_neighbor_numbers, nl_atom_numbers, nl_atom_serial, nl_a);
UNSIGNED_INT_VECTOR *PME_uxyz = reinterpret_cast<UNSIGNED_INT_VECTOR *>(pme_uxyz);
UNSIGNED_INT_VECTOR *PME_kxyz = reinterpret_cast<UNSIGNED_INT_VECTOR *>(pme_kxyz);
VECTOR *PME_frxyz = reinterpret_cast<VECTOR *>(pme_frxyz);
cufftComplex *PME_FQ = reinterpret_cast<cufftComplex *>(pme_fq);
Reset_List<<<3 * atom_numbers / 32 + 1, 32, 0, stream>>>(3 * atom_numbers, reinterpret_cast<int *>(PME_uxyz),
1 << 30);
PME_Atom_Near<<<atom_numbers / 32 + 1, 32, 0, stream>>>(
uint_crd, PME_atom_near, PME_Nin, periodic_factor_inverse * fftx, periodic_factor_inverse * ffty,
periodic_factor_inverse * fftz, atom_numbers, fftx, ffty, fftz, PME_kxyz, PME_uxyz, PME_frxyz);
Reset_List<<<PME_Nall / 1024 + 1, 1024, 0, stream>>>(PME_Nall, PME_Q, 0);
PME_Q_Spread<<<atom_numbers / thread_PME.x + 1, thread_PME, 0, stream>>>(PME_atom_near, charge, PME_frxyz, PME_Q,
PME_kxyz, atom_numbers);
cufftExecR2C(PME_plan_r2c, reinterpret_cast<float *>(PME_Q), reinterpret_cast<cufftComplex *>(PME_FQ));
PME_Energy_Reciprocal<<<1, 1024, 0, stream>>>(PME_Nfft, PME_FQ, PME_BC, d_reciprocal_ene);
PME_Energy_Product<<<1, 1024, 0, stream>>>(atom_numbers, charge, charge, d_self_ene);
Scale_List<<<1, 1, 0, stream>>>(1, d_self_ene, -beta / sqrtf(PI));
Reset_List<<<1, 1, 0, stream>>>(1, d_direct_ene, 0.0);
PME_Direct_Energy<<<atom_numbers / thread_PME.x + 1, thread_PME, 0, stream>>>(
atom_numbers, nl_a, uint_crd, scaler, charge, beta, cutoff * cutoff, d_direct_ene);
Reset_List<<<1, 1, 0, stream>>>(1, d_correction_ene, 0.0);
PME_Excluded_Energy_Correction<<<atom_numbers / 32 + 1, 32, 0, stream>>>(
atom_numbers, uint_crd, scaler, charge, beta, sqrtf(PI), excluded_list_start, excluded_list, excluded_atom_numbers,
d_correction_ene);
return;
}

View File

@ -1,32 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_PME_PME_ENERGY_IMPL_H_
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_PME_PME_ENERGY_IMPL_H_
#include <cufft.h>
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
CUDA_LIB_EXPORT void PMEEnergy(int fftx, int ffty, int fftz, int atom_numbers, float beta, float *PME_BC, int *pme_uxyz,
float *pme_frxyz, float *PME_Q, float *pme_fq, int *PME_atom_near, int *pme_kxyz,
const int *uint_crd_f, const float *charge, int *nl_atom_numbers, int *nl_atom_serial,
int *nl, const float *scaler_f, const int *excluded_list_start, const int *excluded_list,
const int *excluded_atom_numbers, float *d_reciprocal_ene, float *d_self_ene,
float *d_direct_ene, float *d_correction_ene, dim3 thread_PME, int PME_Nin, int PME_Nfft,
int PME_Nall, const cufftHandle &PME_plan_r2c, const cufftHandle &PME_plan_c2r,
cudaStream_t stream);
#endif

View File

@ -1,90 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Note:
* PMEEnergyUpdate. This is an experimental interface that is subject to change and/or deletion.
*/
#include "plugin/device/gpu/kernel/cuda_impl/sponge/pme/pme_energy_update_impl.cuh"
#include "plugin/device/gpu/kernel/cuda_impl/sponge/pme/pme_common.cuh"
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common_sponge.cuh"
__global__ void PME_Energy_Reciprocal_update(const int element_number, const cufftComplex *FQ, const float *BC,
float *sum) {
if (threadIdx.x == 0) {
sum[0] = 0.;
}
__syncthreads();
float lin = 0.0;
cufftComplex FQ_i;
for (int i = threadIdx.x; i < element_number; i = i + blockDim.x) {
FQ_i = FQ[i];
lin = lin + (FQ_i.x * FQ_i.x + FQ_i.y * FQ_i.y) * BC[i];
}
atomicAdd(sum, lin);
}
void PMEEnergyUpdate(int fftx, int ffty, int fftz, int atom_numbers, float beta, float *PME_BC, int *pme_uxyz,
float *pme_frxyz, float *PME_Q, float *pme_fq, int *PME_atom_near, int *pme_kxyz,
const int *uint_crd_f, const float *charge, int *nl_atom_numbers, int *nl_atom_serial, int *nl,
const float *scaler_f, const int *excluded_list_start, const int *excluded_list,
const int *excluded_atom_numbers, float *d_reciprocal_ene, float *d_self_ene, float *d_direct_ene,
float *d_correction_ene, dim3 thread_PME, int PME_Nin, int PME_Nfft, int PME_Nall,
const cufftHandle &PME_plan_r2c, const cufftHandle &PME_plan_c2r, float *neutralizing_factor,
float *charge_sum, int max_neighbor_numbers, cudaStream_t stream) {
UNSIGNED_INT_VECTOR *uint_crd =
const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f));
VECTOR *scaler = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(scaler_f));
// int max_neighbor_numbers = 800;
NEIGHBOR_LIST *nl_a = reinterpret_cast<NEIGHBOR_LIST *>(nl);
construct_neighbor_list_kernel<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>(
atom_numbers, max_neighbor_numbers, nl_atom_numbers, nl_atom_serial, nl_a);
UNSIGNED_INT_VECTOR *PME_uxyz = reinterpret_cast<UNSIGNED_INT_VECTOR *>(pme_uxyz);
UNSIGNED_INT_VECTOR *PME_kxyz = reinterpret_cast<UNSIGNED_INT_VECTOR *>(pme_kxyz);
VECTOR *PME_frxyz = reinterpret_cast<VECTOR *>(pme_frxyz);
cufftComplex *PME_FQ = reinterpret_cast<cufftComplex *>(pme_fq);
Reset_List<<<3 * atom_numbers / 32 + 1, 32, 0, stream>>>(3 * atom_numbers, reinterpret_cast<int *>(PME_uxyz),
1 << 30);
PME_Atom_Near<<<atom_numbers / 32 + 1, 32, 0, stream>>>(
uint_crd, PME_atom_near, PME_Nin, periodic_factor_inverse * fftx, periodic_factor_inverse * ffty,
periodic_factor_inverse * fftz, atom_numbers, fftx, ffty, fftz, PME_kxyz, PME_uxyz, PME_frxyz);
Reset_List<<<PME_Nall / 1024 + 1, 1024, 0, stream>>>(PME_Nall, PME_Q, 0);
PME_Q_Spread<<<atom_numbers / thread_PME.x + 1, thread_PME, 0, stream>>>(PME_atom_near, charge, PME_frxyz, PME_Q,
PME_kxyz, atom_numbers);
cufftExecR2C(PME_plan_r2c, reinterpret_cast<float *>(PME_Q), reinterpret_cast<cufftComplex *>(PME_FQ));
PME_Energy_Reciprocal_update<<<1, 1024, 0, stream>>>(PME_Nfft, PME_FQ, PME_BC, d_reciprocal_ene);
PME_Energy_Product<<<1, 1024, 0, stream>>>(atom_numbers, charge, charge, d_self_ene);
Scale_List<<<1, 1, 0, stream>>>(1, d_self_ene, -beta / sqrtf(PI));
Sum_Of_List<<<1, 1024>>>(atom_numbers, charge, charge_sum);
device_add<<<1, 1>>>(d_self_ene, neutralizing_factor, charge_sum);
Reset_List<<<1, 1, 0, stream>>>(1, d_direct_ene, 0.0);
PME_Direct_Energy<<<atom_numbers / thread_PME.x + 1, thread_PME, 0, stream>>>(
atom_numbers, nl_a, uint_crd, scaler, charge, beta, cutoff * cutoff, d_direct_ene);
Reset_List<<<1, 1, 0, stream>>>(1, d_correction_ene, 0.0);
PME_Excluded_Energy_Correction<<<atom_numbers / 32 + 1, 32, 0, stream>>>(
atom_numbers, uint_crd, scaler, charge, beta, sqrtf(PI), excluded_list_start, excluded_list, excluded_atom_numbers,
d_correction_ene);
return;
}

View File

@ -1,34 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_PME_PME_ENERGY_UPDATE_IMPL_H_
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_PME_PME_ENERGY_UPDATE_IMPL_H_
#include <cufft.h>
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
CUDA_LIB_EXPORT void PMEEnergyUpdate(int fftx, int ffty, int fftz, int atom_numbers, float beta, float *PME_BC,
int *pme_uxyz, float *pme_frxyz, float *PME_Q, float *pme_fq, int *PME_atom_near,
int *pme_kxyz, const int *uint_crd_f, const float *charge, int *nl_atom_numbers,
int *nl_atom_serial, int *nl, const float *scaler_f,
const int *excluded_list_start, const int *excluded_list,
const int *excluded_atom_numbers, float *d_reciprocal_ene, float *d_self_ene,
float *d_direct_ene, float *d_correction_ene, dim3 thread_PME, int PME_Nin,
int PME_Nfft, int PME_Nall, const cufftHandle &PME_plan_r2c,
const cufftHandle &PME_plan_c2r, float *neutralizing_factor, float *charge_sum,
int max_neighbor_numbers, cudaStream_t stream);
#endif

Some files were not shown because too many files have changed in this diff Show More