!44925 remove sponge ops
Merge pull request !44925 from mamba_ni/remove_sponge
This commit is contained in:
commit
cf8e460295
|
@ -1,66 +0,0 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/sponge/angle/angle_atom_energy_impl.cuh"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/util.cuh"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common_sponge.cuh"
|
||||
|
||||
__global__ void AngleAtomEnergyKernel(int angle_numbers, const UNSIGNED_INT_VECTOR *uint_crd, const VECTOR *scaler,
|
||||
const int *atom_a, const int *atom_b, const int *atom_c, const float *angle_k,
|
||||
const float *angle_theta0, float *atom_energy) {
|
||||
int angle_i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (angle_i < angle_numbers) {
|
||||
int atom_i = atom_a[angle_i];
|
||||
int atom_j = atom_b[angle_i];
|
||||
int atom_k = atom_c[angle_i];
|
||||
|
||||
float theta0 = angle_theta0[angle_i];
|
||||
float k = angle_k[angle_i];
|
||||
|
||||
VECTOR drij = Get_Periodic_Displacement(uint_crd[atom_i], uint_crd[atom_j], scaler[0]);
|
||||
VECTOR drkj = Get_Periodic_Displacement(uint_crd[atom_k], uint_crd[atom_j], scaler[0]);
|
||||
|
||||
float rij_2 = 1. / (drij * drij);
|
||||
float rkj_2 = 1. / (drkj * drkj);
|
||||
float rij_1_rkj_1 = sqrtf(rij_2 * rkj_2);
|
||||
|
||||
float costheta = drij * drkj * rij_1_rkj_1;
|
||||
costheta = fmaxf(-0.999999, fminf(costheta, 0.999999));
|
||||
float theta = acosf(costheta);
|
||||
|
||||
float dtheta = theta - theta0;
|
||||
|
||||
atomicAdd(&atom_energy[atom_i], k * dtheta * dtheta);
|
||||
}
|
||||
}
|
||||
|
||||
void AngleAtomEnergy(int angle_numbers, int atom_numbers, const int *uint_crd_f, const float *scaler_f,
|
||||
const int *atom_a, const int *atom_b, const int *atom_c, const float *angle_k,
|
||||
const float *angle_theta0, float *ene, cudaStream_t stream) {
|
||||
Reset_List<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>(atom_numbers, ene, 0.);
|
||||
size_t thread_per_block = 128;
|
||||
size_t block_per_grid = ceilf(static_cast<float>(angle_numbers) / 128);
|
||||
UNSIGNED_INT_VECTOR *uint_crd =
|
||||
const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f));
|
||||
VECTOR *scaler = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(scaler_f));
|
||||
|
||||
AngleAtomEnergyKernel<<<block_per_grid, thread_per_block, 0, stream>>>(angle_numbers, uint_crd, scaler, atom_a,
|
||||
atom_b, atom_c, angle_k, angle_theta0, ene);
|
||||
return;
|
||||
}
|
||||
void AngleAtomEnergy(int angle_numbers, int atom_numbers, const int *uint_crd_f, const float *scaler_f,
|
||||
const int *atom_a, const int *atom_b, const int *atom_c, const float *angle_k,
|
||||
const float *angle_theta0, float *ene, cudaStream_t stream);
|
|
@ -1,27 +0,0 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_ANGLE_ANGLE_ATOM_ENERGY_IMPL_H_
|
||||
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_ANGLE_ANGLE_ATOM_ENERGY_IMPL_H_
|
||||
|
||||
#include <curand_kernel.h>
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
|
||||
|
||||
CUDA_LIB_EXPORT void AngleAtomEnergy(int angle_numbers, int atom_numbers, const int *uint_crd_f, const float *scaler_f,
|
||||
const int *atom_a, const int *atom_b, const int *atom_c, const float *angle_k,
|
||||
const float *angle_theta0, float *ene, cudaStream_t stream);
|
||||
#endif
|
|
@ -1,63 +0,0 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/sponge/angle/angle_energy_impl.cuh"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/util.cuh"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common_sponge.cuh"
|
||||
|
||||
__global__ void AngleEnergyKernel(int angle_numbers, const UNSIGNED_INT_VECTOR *uint_crd, const VECTOR *scaler,
|
||||
const int *atom_a, const int *atom_b, const int *atom_c, const float *angle_k,
|
||||
const float *angle_theta0, float *angle_energy) {
|
||||
int angle_i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (angle_i < angle_numbers) {
|
||||
int atom_i = atom_a[angle_i];
|
||||
int atom_j = atom_b[angle_i];
|
||||
int atom_k = atom_c[angle_i];
|
||||
|
||||
float theta0 = angle_theta0[angle_i];
|
||||
float k = angle_k[angle_i];
|
||||
|
||||
VECTOR drij = Get_Periodic_Displacement(uint_crd[atom_i], uint_crd[atom_j], scaler[0]);
|
||||
VECTOR drkj = Get_Periodic_Displacement(uint_crd[atom_k], uint_crd[atom_j], scaler[0]);
|
||||
|
||||
float rij_2 = 1. / (drij * drij);
|
||||
float rkj_2 = 1. / (drkj * drkj);
|
||||
float rij_1_rkj_1 = sqrtf(rij_2 * rkj_2);
|
||||
|
||||
float costheta = drij * drkj * rij_1_rkj_1;
|
||||
costheta = fmaxf(-0.999999, fminf(costheta, 0.999999));
|
||||
float theta = acosf(costheta);
|
||||
|
||||
float dtheta = theta - theta0;
|
||||
|
||||
angle_energy[angle_i] = k * dtheta * dtheta;
|
||||
}
|
||||
}
|
||||
|
||||
void AngleEnergy(int angle_numbers, const int *uint_crd_f, const float *scaler_f, const int *atom_a, const int *atom_b,
|
||||
const int *atom_c, const float *angle_k, const float *angle_theta0, float *ene, cudaStream_t stream) {
|
||||
size_t thread_per_block = 128;
|
||||
size_t block_per_grid = ceilf(static_cast<float>(angle_numbers) / 128);
|
||||
UNSIGNED_INT_VECTOR *uint_crd =
|
||||
const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f));
|
||||
VECTOR *scaler = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(scaler_f));
|
||||
|
||||
AngleEnergyKernel<<<block_per_grid, thread_per_block, 0, stream>>>(angle_numbers, uint_crd, scaler, atom_a, atom_b,
|
||||
atom_c, angle_k, angle_theta0, ene);
|
||||
return;
|
||||
}
|
||||
void AngleEnergy(int angle_numbers, const int *uint_crd_f, const float *scaler_f, const int *atom_a, const int *atom_b,
|
||||
const int *atom_c, const float *angle_k, const float *angle_theta0, float *ene, cudaStream_t stream);
|
|
@ -1,27 +0,0 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_ANGLE_ANGLE_ENERGY_IMPL_H_
|
||||
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_ANGLE_ANGLE_ENERGY_IMPL_H_
|
||||
|
||||
#include <curand_kernel.h>
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
|
||||
|
||||
CUDA_LIB_EXPORT void AngleEnergy(int angle_numbers, const int *uint_crd_f, const float *scaler_f, const int *atom_a,
|
||||
const int *atom_b, const int *atom_c, const float *angle_k, const float *angle_theta0,
|
||||
float *ene, cudaStream_t stream);
|
||||
#endif
|
|
@ -1,86 +0,0 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/sponge/angle/angle_force_impl.cuh"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/util.cuh"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common_sponge.cuh"
|
||||
|
||||
__global__ void AngleForceKernel(int angle_numbers, const UNSIGNED_INT_VECTOR *uint_crd, const VECTOR *scaler,
|
||||
const int *atom_a, const int *atom_b, const int *atom_c, const float *angle_k,
|
||||
const float *angle_theta0, VECTOR *frc) {
|
||||
int angle_i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (angle_i < angle_numbers) {
|
||||
int atom_i = atom_a[angle_i];
|
||||
int atom_j = atom_b[angle_i];
|
||||
int atom_k = atom_c[angle_i];
|
||||
|
||||
float theta0 = angle_theta0[angle_i];
|
||||
float k = angle_k[angle_i];
|
||||
|
||||
VECTOR drij = Get_Periodic_Displacement(uint_crd[atom_i], uint_crd[atom_j], scaler[0]);
|
||||
VECTOR drkj = Get_Periodic_Displacement(uint_crd[atom_k], uint_crd[atom_j], scaler[0]);
|
||||
|
||||
float rij_2 = 1. / (drij * drij);
|
||||
float rkj_2 = 1. / (drkj * drkj);
|
||||
float rij_1_rkj_1 = sqrtf(rij_2 * rkj_2);
|
||||
|
||||
float costheta = drij * drkj * rij_1_rkj_1;
|
||||
costheta = fmaxf(-0.999999, fminf(costheta, 0.999999));
|
||||
float theta = acosf(costheta);
|
||||
|
||||
float dtheta = theta - theta0;
|
||||
k = -2 * k * dtheta / sinf(theta);
|
||||
|
||||
float common_factor_cross = k * rij_1_rkj_1;
|
||||
float common_factor_self = k * costheta;
|
||||
|
||||
VECTOR fi = common_factor_self * rij_2 * drij - common_factor_cross * drkj;
|
||||
VECTOR fk = common_factor_self * rkj_2 * drkj - common_factor_cross * drij;
|
||||
|
||||
atomicAdd(&frc[atom_i].x, fi.x);
|
||||
atomicAdd(&frc[atom_i].y, fi.y);
|
||||
atomicAdd(&frc[atom_i].z, fi.z);
|
||||
|
||||
atomicAdd(&frc[atom_k].x, fk.x);
|
||||
atomicAdd(&frc[atom_k].y, fk.y);
|
||||
atomicAdd(&frc[atom_k].z, fk.z);
|
||||
|
||||
fi = -fi - fk;
|
||||
|
||||
atomicAdd(&frc[atom_j].x, fi.x);
|
||||
atomicAdd(&frc[atom_j].y, fi.y);
|
||||
atomicAdd(&frc[atom_j].z, fi.z);
|
||||
}
|
||||
}
|
||||
|
||||
void AngleForce(int angle_numbers, int atom_numbers, const int *uint_crd_f, const float *scaler_f, const int *atom_a,
|
||||
const int *atom_b, const int *atom_c, const float *angle_k, const float *angle_theta0, float *frc_f,
|
||||
cudaStream_t stream) {
|
||||
Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128, 0, stream>>>(3 * atom_numbers, frc_f, 0.);
|
||||
size_t thread_per_block = 128;
|
||||
size_t block_per_grid = ceilf(static_cast<float>(angle_numbers) / 128);
|
||||
UNSIGNED_INT_VECTOR *uint_crd =
|
||||
const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f));
|
||||
VECTOR *frc = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(frc_f));
|
||||
VECTOR *scaler = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(scaler_f));
|
||||
|
||||
AngleForceKernel<<<block_per_grid, thread_per_block, 0, stream>>>(angle_numbers, uint_crd, scaler, atom_a, atom_b,
|
||||
atom_c, angle_k, angle_theta0, frc);
|
||||
return;
|
||||
}
|
||||
void AngleForce(int angle_numbers, int atom_numbers, const int *uint_crd_f, const float *scaler_f, const int *atom_a,
|
||||
const int *atom_b, const int *atom_c, const float *angle_k, const float *angle_theta0, float *frc_f,
|
||||
cudaStream_t stream);
|
|
@ -1,27 +0,0 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_ANGLE_ANGLE_FORCE_IMPL_H_
|
||||
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_ANGLE_ANGLE_FORCE_IMPL_H_
|
||||
|
||||
#include <curand_kernel.h>
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
|
||||
|
||||
CUDA_LIB_EXPORT void AngleForce(int angle_numbers, int atom_numbers, const int *uint_crd_f, const float *scaler_f,
|
||||
const int *atom_a, const int *atom_b, const int *atom_c, const float *angle_k,
|
||||
const float *angle_theta0, float *frc_f, cudaStream_t stream);
|
||||
#endif
|
|
@ -1,90 +0,0 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/sponge/angle/angle_force_with_atom_energy_impl.cuh"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/util.cuh"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common_sponge.cuh"
|
||||
|
||||
__global__ void AngleForceWithAtomEnergyKernel(int angle_numbers, const UNSIGNED_INT_VECTOR *uint_crd,
|
||||
const VECTOR *scaler, const int *atom_a, const int *atom_b,
|
||||
const int *atom_c, const float *angle_k, const float *angle_theta0,
|
||||
VECTOR *frc, float *atom_energy) {
|
||||
int angle_i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (angle_i < angle_numbers) {
|
||||
int atom_i = atom_a[angle_i];
|
||||
int atom_j = atom_b[angle_i];
|
||||
int atom_k = atom_c[angle_i];
|
||||
|
||||
float theta0 = angle_theta0[angle_i];
|
||||
float k = angle_k[angle_i];
|
||||
float k2 = k;
|
||||
|
||||
VECTOR drij = Get_Periodic_Displacement(uint_crd[atom_i], uint_crd[atom_j], scaler[0]);
|
||||
VECTOR drkj = Get_Periodic_Displacement(uint_crd[atom_k], uint_crd[atom_j], scaler[0]);
|
||||
|
||||
float rij_2 = 1. / (drij * drij);
|
||||
float rkj_2 = 1. / (drkj * drkj);
|
||||
float rij_1_rkj_1 = sqrtf(rij_2 * rkj_2);
|
||||
|
||||
float costheta = drij * drkj * rij_1_rkj_1;
|
||||
costheta = fmaxf(-0.999999, fminf(costheta, 0.999999));
|
||||
float theta = acosf(costheta);
|
||||
|
||||
float dtheta = theta - theta0;
|
||||
k = -2 * k * dtheta / sinf(theta);
|
||||
|
||||
float common_factor_cross = k * rij_1_rkj_1;
|
||||
float common_factor_self = k * costheta;
|
||||
|
||||
VECTOR fi = common_factor_self * rij_2 * drij - common_factor_cross * drkj;
|
||||
VECTOR fk = common_factor_self * rkj_2 * drkj - common_factor_cross * drij;
|
||||
|
||||
atomicAdd(&frc[atom_i].x, fi.x);
|
||||
atomicAdd(&frc[atom_i].y, fi.y);
|
||||
atomicAdd(&frc[atom_i].z, fi.z);
|
||||
|
||||
atomicAdd(&frc[atom_k].x, fk.x);
|
||||
atomicAdd(&frc[atom_k].y, fk.y);
|
||||
atomicAdd(&frc[atom_k].z, fk.z);
|
||||
|
||||
fi = -fi - fk;
|
||||
|
||||
atomicAdd(&frc[atom_j].x, fi.x);
|
||||
atomicAdd(&frc[atom_j].y, fi.y);
|
||||
atomicAdd(&frc[atom_j].z, fi.z);
|
||||
|
||||
atomicAdd(&atom_energy[atom_i], k2 * dtheta * dtheta);
|
||||
}
|
||||
}
|
||||
|
||||
void AngleForceWithAtomEnergy(int angle_numbers, int atom_numbers, const int *uint_crd_f, const float *scaler_f,
|
||||
const int *atom_a, const int *atom_b, const int *atom_c, const float *angle_k,
|
||||
const float *angle_theta0, float *frc_f, float *ene, cudaStream_t stream) {
|
||||
Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128, 0, stream>>>(3 * atom_numbers, frc_f, 0.);
|
||||
size_t thread_per_block = 128;
|
||||
size_t block_per_grid = ceilf(static_cast<float>(angle_numbers) / 128);
|
||||
UNSIGNED_INT_VECTOR *uint_crd =
|
||||
const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f));
|
||||
VECTOR *frc = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(frc_f));
|
||||
VECTOR *scaler = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(scaler_f));
|
||||
|
||||
AngleForceWithAtomEnergyKernel<<<block_per_grid, thread_per_block, 0, stream>>>(
|
||||
angle_numbers, uint_crd, scaler, atom_a, atom_b, atom_c, angle_k, angle_theta0, frc, ene);
|
||||
return;
|
||||
}
|
||||
void AngleForceWithAtomEnergy(int angle_numbers, int atom_numbers, const int *uint_crd_f, const float *scaler_f,
|
||||
const int *atom_a, const int *atom_b, const int *atom_c, const float *angle_k,
|
||||
const float *angle_theta0, float *frc_f, float *ene, cudaStream_t stream);
|
|
@ -1,28 +0,0 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_ANGLE_ANGLE_FORCE_WITH_ATOM_ENERGY_IMPL_H_
|
||||
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_ANGLE_ANGLE_FORCE_WITH_ATOM_ENERGY_IMPL_H_
|
||||
|
||||
#include <curand_kernel.h>
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
|
||||
|
||||
CUDA_LIB_EXPORT void AngleForceWithAtomEnergy(int angle_numbers, int atom_numbers, const int *uint_crd_f,
|
||||
const float *scaler_f, const int *atom_a, const int *atom_b,
|
||||
const int *atom_c, const float *angle_k, const float *angle_theta0,
|
||||
float *frc_f, float *ene, cudaStream_t stream);
|
||||
#endif
|
|
@ -1,57 +0,0 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/sponge/bond/bond_atom_energy_cuda_gpu_impl.cuh"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common_sponge.cuh"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/util.cuh"
|
||||
|
||||
__global__ void BondAtomEnergyCudaKernel(const int bond_numbers, const UNSIGNED_INT_VECTOR *uint_crd,
|
||||
const VECTOR *scaler, const int *atom_a, const int *atom_b,
|
||||
const float *bond_k, const float *bond_r0, float *atom_ene) {
|
||||
int bond_i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (bond_i < bond_numbers) {
|
||||
int atom_i = atom_a[bond_i];
|
||||
int atom_j = atom_b[bond_i];
|
||||
|
||||
float k = bond_k[bond_i];
|
||||
float r0 = bond_r0[bond_i];
|
||||
|
||||
VECTOR dr = Get_Periodic_Displacement(uint_crd[atom_i], uint_crd[atom_j], scaler[0]);
|
||||
|
||||
float r1 = norm3df(dr.x, dr.y, dr.z);
|
||||
float tempf = r1 - r0;
|
||||
|
||||
atomicAdd(&atom_ene[atom_i], k * tempf * tempf);
|
||||
}
|
||||
}
|
||||
|
||||
void BondAtomEnergy(int bond_numbers, int atom_numbers, const int *uint_crd_f, const float *scaler_f, const int *atom_a,
|
||||
const int *atom_b, const float *bond_k, const float *bond_r0, float *atom_ene,
|
||||
cudaStream_t stream) {
|
||||
Reset_List<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>(atom_numbers, atom_ene, 0.);
|
||||
size_t thread_per_block = 128;
|
||||
size_t block_per_grid = ceilf(static_cast<float>(bond_numbers) / 128);
|
||||
UNSIGNED_INT_VECTOR *uint_crd =
|
||||
const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f));
|
||||
VECTOR *scaler = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(scaler_f));
|
||||
|
||||
BondAtomEnergyCudaKernel<<<block_per_grid, thread_per_block, 0, stream>>>(bond_numbers, uint_crd, scaler, atom_a,
|
||||
atom_b, bond_k, bond_r0, atom_ene);
|
||||
return;
|
||||
}
|
||||
|
||||
void BondAtomEnergy(int bond_numbers, int atom_numbers, const int *uint_crd_f, const float *scaler_f, const int *atom_a,
|
||||
const int *atom_b, const float *bond_k, const float *bond_r0, float *atom_ene, cudaStream_t stream);
|
|
@ -1,28 +0,0 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_BOND_BOND_ATOM_ENERGY_CUDA_GPU_IMPL_H_
|
||||
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_BOND_BOND_ATOM_ENERGY_GPU_IMPL_H_
|
||||
|
||||
#include <curand_kernel.h>
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
|
||||
|
||||
CUDA_LIB_EXPORT void BondAtomEnergy(int bond_numbers, int atom_numbers, const int *uint_crd_f, const float *scaler_f,
|
||||
const int *atom_a, const int *atom_b, const float *bond_k, const float *bond_r0,
|
||||
float *atom_ene, cudaStream_t stream);
|
||||
|
||||
#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_BOND_ATOM_ENERGY_GPU_IMPL_H_
|
|
@ -1,61 +0,0 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/sponge/bond/bond_energy_cuda_gpu_impl.cuh"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common_sponge.cuh"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/util.cuh"
|
||||
|
||||
__global__ void BondEnergyCudaKernel(const int bond_numbers, const UNSIGNED_INT_VECTOR *uint_crd, const VECTOR *scaler,
|
||||
const int *atom_a, const int *atom_b, const float *bond_k, const float *bond_r0,
|
||||
float *bond_ene) {
|
||||
int bond_i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (bond_i < bond_numbers) {
|
||||
int atom_i = atom_a[bond_i];
|
||||
int atom_j = atom_b[bond_i];
|
||||
|
||||
float k = bond_k[bond_i];
|
||||
float r0 = bond_r0[bond_i];
|
||||
|
||||
VECTOR dr = Get_Periodic_Displacement(uint_crd[atom_i], uint_crd[atom_j], scaler[0]);
|
||||
|
||||
float r1 = norm3df(dr.x, dr.y, dr.z);
|
||||
float tempf = r1 - r0;
|
||||
float temp = k * tempf * tempf;
|
||||
bond_ene[bond_i] = temp;
|
||||
}
|
||||
}
|
||||
|
||||
void BondEnergy(int bond_numbers, int atom_numbers, const unsigned int *uint_crd_f, const float *scaler_f,
|
||||
const int *atom_a, const int *atom_b, const float *bond_k, const float *bond_r0, float *bond_ene,
|
||||
cudaStream_t stream) {
|
||||
size_t thread_per_block = 128;
|
||||
size_t block_per_grid = ceilf(static_cast<float>(bond_numbers) / 128);
|
||||
UNSIGNED_INT_VECTOR *uint_crd =
|
||||
const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f));
|
||||
VECTOR *scaler = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(scaler_f));
|
||||
|
||||
Reset_List<<<(unsigned int)ceilf(static_cast<float>(bond_numbers) / 128), 128, 0, stream>>>(bond_numbers,
|
||||
bond_ene, 0.);
|
||||
|
||||
BondEnergyCudaKernel<<<block_per_grid, thread_per_block, 0, stream>>>(bond_numbers, uint_crd, scaler,
|
||||
atom_a, atom_b, bond_k, bond_r0,
|
||||
bond_ene);
|
||||
return;
|
||||
}
|
||||
|
||||
void BondEnergy(int bond_numbers, int atom_numbers, const unsigned int *uint_crd_f, const float *scaler_f,
|
||||
const int *atom_a, const int *atom_b, const float *bond_k, const float *bond_r0,
|
||||
float *bond_ene, cudaStream_t stream);
|
|
@ -1,28 +0,0 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_BOND_BOND_ENERGY_CUDA_GPU_IMPL_H_
|
||||
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_BOND_BOND_ENERGY_CUDA_GPU_IMPL_H_
|
||||
|
||||
#include <curand_kernel.h>
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
|
||||
|
||||
CUDA_LIB_EXPORT void BondEnergy(int bond_numbers, int atom_numbers, const unsigned int *uint_crd_f,
|
||||
const float *scaler_f, const int *atom_a, const int *atom_b, const float *bond_k,
|
||||
const float *bond_r0, float *bond_ene, cudaStream_t stream);
|
||||
|
||||
#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_BOND_ENERGY_CUDA_GPU_IMPL_H_
|
|
@ -1,63 +0,0 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/sponge/bond/bond_force_cuda_gpu_impl.cuh"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common_sponge.cuh"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/util.cuh"
|
||||
|
||||
__global__ void BondForceCudaKernel(int bond_numbers, const UNSIGNED_INT_VECTOR *uint_crd, const VECTOR *scaler,
|
||||
const int *atom_a, const int *atom_b, const float *bond_k, const float *bond_r0,
|
||||
VECTOR *frc) {
|
||||
int bond_i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (bond_i < bond_numbers) {
|
||||
int atom_i = atom_a[bond_i];
|
||||
int atom_j = atom_b[bond_i];
|
||||
|
||||
float k = bond_k[bond_i];
|
||||
float r0 = bond_r0[bond_i];
|
||||
VECTOR dr = Get_Periodic_Displacement(uint_crd[atom_i], uint_crd[atom_j], scaler[0]);
|
||||
float r_1 = rnorm3df(dr.x, dr.y, dr.z);
|
||||
float tempf = 1.0 - r0 * r_1;
|
||||
|
||||
VECTOR f = 2 * tempf * k * dr;
|
||||
atomicAdd(&frc[atom_i].x, -f.x);
|
||||
atomicAdd(&frc[atom_i].y, -f.y);
|
||||
atomicAdd(&frc[atom_i].z, -f.z);
|
||||
|
||||
atomicAdd(&frc[atom_j].x, f.x);
|
||||
atomicAdd(&frc[atom_j].y, f.y);
|
||||
atomicAdd(&frc[atom_j].z, f.z);
|
||||
}
|
||||
}
|
||||
|
||||
void BondForce(int bond_numbers, int atom_numbers, const unsigned int *uint_crd_f, const float *scaler_f,
|
||||
const int *atom_a, const int *atom_b, const float *bond_k, const float *bond_r0, float *frc_f,
|
||||
cudaStream_t stream) {
|
||||
Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128, 0, stream>>>(3 * atom_numbers, frc_f, 0.);
|
||||
size_t thread_per_block = 128;
|
||||
size_t block_per_grid = ceilf(static_cast<float>(bond_numbers) / 128);
|
||||
UNSIGNED_INT_VECTOR *uint_crd =
|
||||
const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f));
|
||||
VECTOR *scaler = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(scaler_f));
|
||||
VECTOR *frc = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(frc_f));
|
||||
BondForceCudaKernel<<<block_per_grid, thread_per_block, 0, stream>>>(bond_numbers, uint_crd, scaler, atom_a, atom_b,
|
||||
bond_k, bond_r0, frc);
|
||||
return;
|
||||
}
|
||||
|
||||
void BondForce(int bond_numbers, int atom_numbers, const unsigned int *uint_crd_f, const float *scaler_f,
|
||||
const int *atom_a, const int *atom_b, const float *bond_k, const float *bond_r0,
|
||||
float *frc_f, cudaStream_t stream);
|
|
@ -1,27 +0,0 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_BOND_BOND_FORCE_CUDA_GPU_IMPL_H_
|
||||
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_BOND_BOND_FORCE_CUDA_GPU_IMPL_H_
|
||||
|
||||
#include <curand_kernel.h>
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
|
||||
|
||||
CUDA_LIB_EXPORT void BondForce(int bond_numbers, int atom_numbers, const unsigned int *uint_crd_f,
|
||||
const float *scaler_f, const int *atom_a, const int *atom_b, const float *bond_k,
|
||||
const float *bond_r0, float *frc_f, cudaStream_t stream);
|
||||
#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_BOND_FORCE_CUDA_GPU_IMPL_H_
|
|
@ -1,75 +0,0 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/sponge/bond/bond_force_with_atom_energy_and_virial_impl.cuh"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common_sponge.cuh"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/util.cuh"
|
||||
|
||||
__global__ void BondForceWithAtomEnergyAndVirialKernel(const int bond_numbers, const UNSIGNED_INT_VECTOR *uint_crd,
|
||||
const VECTOR *scaler, const int *atom_a, const int *atom_b,
|
||||
const float *bond_k, const float *bond_r0, VECTOR *frc,
|
||||
float *atom_energy, float *atom_virial) {
|
||||
int bond_i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (bond_i < bond_numbers) {
|
||||
int atom_i = atom_a[bond_i];
|
||||
int atom_j = atom_b[bond_i];
|
||||
|
||||
float k = bond_k[bond_i];
|
||||
float r0 = bond_r0[bond_i];
|
||||
|
||||
VECTOR dr = Get_Periodic_Displacement(uint_crd[atom_i], uint_crd[atom_j], scaler[0]);
|
||||
float abs_r = norm3df(dr.x, dr.y, dr.z);
|
||||
float r_1 = 1. / abs_r;
|
||||
|
||||
float tempf2 = abs_r - r0;
|
||||
float tempf = 2 * tempf2 * k;
|
||||
VECTOR f = tempf * r_1 * dr;
|
||||
|
||||
atomicAdd(&frc[atom_i].x, -f.x);
|
||||
atomicAdd(&frc[atom_i].y, -f.y);
|
||||
atomicAdd(&frc[atom_i].z, -f.z);
|
||||
|
||||
atomicAdd(&frc[atom_j].x, f.x);
|
||||
atomicAdd(&frc[atom_j].y, f.y);
|
||||
atomicAdd(&frc[atom_j].z, f.z);
|
||||
|
||||
atomicAdd(&atom_virial[atom_i], -tempf * abs_r);
|
||||
atomicAdd(&atom_energy[atom_i], k * tempf2 * tempf2);
|
||||
}
|
||||
}
|
||||
|
||||
void BondForceWithAtomEnergyAndVirial(int bond_numbers, int atom_numbers, const unsigned int *uint_crd_f,
|
||||
const float *scaler_f, const int *atom_a, const int *atom_b, const float *bond_k,
|
||||
const float *bond_r0, float *frc_f, float *atom_energy, float *atom_v,
|
||||
cudaStream_t stream) {
|
||||
Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128, 0, stream>>>(3 * atom_numbers, frc_f, 0.);
|
||||
Reset_List<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>(atom_numbers, atom_v, 0.);
|
||||
Reset_List<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>(atom_numbers, atom_energy, 0.);
|
||||
size_t thread_per_block = 128;
|
||||
size_t block_per_grid = ceilf(static_cast<float>(bond_numbers) / 128);
|
||||
UNSIGNED_INT_VECTOR *uint_crd =
|
||||
const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f));
|
||||
VECTOR *frc = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(frc_f));
|
||||
VECTOR *scaler = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(scaler_f));
|
||||
BondForceWithAtomEnergyAndVirialKernel<<<block_per_grid, thread_per_block, 0, stream>>>(
|
||||
bond_numbers, uint_crd, scaler, atom_a, atom_b, bond_k, bond_r0, frc, atom_energy, atom_v);
|
||||
return;
|
||||
}
|
||||
|
||||
void BondForceWithAtomEnergyAndVirial(int bond_numbers, int atom_numbers, const unsigned int *uint_crd_f,
|
||||
const float *scaler_f, const int *atom_a, const int *atom_b, const float *bond_k,
|
||||
const float *bond_r0, float *frc_f, float *atom_energy, float *atom_v,
|
||||
cudaStream_t stream);
|
|
@ -1,29 +0,0 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_BOND_BOND_FORCE_WITH_ATOM_VIRIAL_IMPL_H_
|
||||
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_BOND_BOND_FORCE_WITH_ATOM_VIRIAL_IMPL_H_
|
||||
|
||||
#include <curand_kernel.h>
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
|
||||
|
||||
CUDA_LIB_EXPORT void BondForceWithAtomEnergyAndVirial(int bond_numbers, int atom_numbers,
|
||||
const unsigned int *uint_crd_f, const float *scaler_f,
|
||||
const int *atom_a, const int *atom_b, const float *bond_k,
|
||||
const float *bond_r0, float *frc_f, float *atom_energy,
|
||||
float *atom_v, cudaStream_t stream);
|
||||
#endif
|
|
@ -1,69 +0,0 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/sponge/bond/bond_force_with_atom_energy_impl.cuh"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common_sponge.cuh"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/util.cuh"
|
||||
|
||||
__global__ void BondForceWithAtomEnergyKernel(int bond_numbers, const UNSIGNED_INT_VECTOR *uint_crd,
|
||||
const VECTOR *scaler, const int *atom_a, const int *atom_b,
|
||||
const float *bond_k, const float *bond_r0, VECTOR *frc,
|
||||
float *atom_energy) {
|
||||
int bond_i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (bond_i < bond_numbers) {
|
||||
int atom_i = atom_a[bond_i];
|
||||
int atom_j = atom_b[bond_i];
|
||||
|
||||
float k = bond_k[bond_i];
|
||||
float r0 = bond_r0[bond_i];
|
||||
|
||||
VECTOR dr = Get_Periodic_Displacement(uint_crd[atom_i], uint_crd[atom_j], scaler[0]);
|
||||
|
||||
float abs_r = norm3df(dr.x, dr.y, dr.z);
|
||||
float r_1 = 1. / abs_r;
|
||||
float tempf = abs_r - r0;
|
||||
VECTOR f = 2 * tempf * r_1 * k * dr;
|
||||
|
||||
atomicAdd(&frc[atom_i].x, -f.x);
|
||||
atomicAdd(&frc[atom_i].y, -f.y);
|
||||
atomicAdd(&frc[atom_i].z, -f.z);
|
||||
|
||||
atomicAdd(&frc[atom_j].x, f.x);
|
||||
atomicAdd(&frc[atom_j].y, f.y);
|
||||
atomicAdd(&frc[atom_j].z, f.z);
|
||||
|
||||
atomicAdd(&atom_energy[atom_i], k * tempf * tempf);
|
||||
}
|
||||
}
|
||||
|
||||
void BondForceWithAtomEnergy(int bond_numbers, int atom_numbers, const int *uint_crd_f, const float *scaler_f,
|
||||
const int *atom_a, const int *atom_b, const float *bond_k, const float *bond_r0,
|
||||
float *frc_f, float *atom_e, cudaStream_t stream) {
|
||||
Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128, 0, stream>>>(3 * atom_numbers, frc_f, 0.);
|
||||
Reset_List<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>(atom_numbers, atom_e, 0.);
|
||||
size_t thread_per_block = 128;
|
||||
size_t block_per_grid = ceilf(static_cast<float>(bond_numbers) / 128);
|
||||
UNSIGNED_INT_VECTOR *uint_crd =
|
||||
const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f));
|
||||
VECTOR *frc = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(frc_f));
|
||||
VECTOR *scaler = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(scaler_f));
|
||||
BondForceWithAtomEnergyKernel<<<block_per_grid, thread_per_block, 0, stream>>>(bond_numbers, uint_crd, scaler, atom_a,
|
||||
atom_b, bond_k, bond_r0, frc, atom_e);
|
||||
return;
|
||||
}
|
||||
void BondForceWithAtomEnergy(int bond_numbers, int atom_numbers, const int *uint_crd_f, const float *scaler_f,
|
||||
const int *atom_a, const int *atom_b, const float *bond_k, const float *bond_r0,
|
||||
float *frc_f, float *atom_e, cudaStream_t stream);
|
|
@ -1,28 +0,0 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_BOND_BOND_FORCE_WITH_ATOM_ENERGY_IMPL_H_
|
||||
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_BOND_BOND_FORCE_WITH_ATOM_ENERGY_IMPL_H_
|
||||
|
||||
#include <curand_kernel.h>
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
|
||||
|
||||
CUDA_LIB_EXPORT void BondForceWithAtomEnergy(int bond_numbers, int atom_numbers, const int *uint_crd_f,
|
||||
const float *scaler_f, const int *atom_a, const int *atom_b,
|
||||
const float *bond_k, const float *bond_r0, float *frc_f, float *atom_e,
|
||||
cudaStream_t stream);
|
||||
#endif
|
|
@ -1,69 +0,0 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/sponge/bond/bond_force_with_atom_virial_impl.cuh"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common_sponge.cuh"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/util.cuh"
|
||||
|
||||
__global__ void BondForceWithAtomVirialKernel(int bond_numbers, const UNSIGNED_INT_VECTOR *uint_crd,
|
||||
const VECTOR *scaler, const int *atom_a, const int *atom_b,
|
||||
const float *bond_k, const float *bond_r0, VECTOR *frc,
|
||||
float *atom_virial) {
|
||||
int bond_i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (bond_i < bond_numbers) {
|
||||
int atom_i = atom_a[bond_i];
|
||||
int atom_j = atom_b[bond_i];
|
||||
|
||||
float k = bond_k[bond_i];
|
||||
float r0 = bond_r0[bond_i];
|
||||
|
||||
VECTOR dr = Get_Periodic_Displacement(uint_crd[atom_i], uint_crd[atom_j], scaler[0]);
|
||||
|
||||
float abs_r = norm3df(dr.x, dr.y, dr.z);
|
||||
float r_1 = 1. / abs_r;
|
||||
float tempf = (abs_r - r0) * k;
|
||||
VECTOR f = 2 * tempf * r_1 * dr;
|
||||
|
||||
atomicAdd(&frc[atom_i].x, -f.x);
|
||||
atomicAdd(&frc[atom_i].y, -f.y);
|
||||
atomicAdd(&frc[atom_i].z, -f.z);
|
||||
|
||||
atomicAdd(&frc[atom_j].x, f.x);
|
||||
atomicAdd(&frc[atom_j].y, f.y);
|
||||
atomicAdd(&frc[atom_j].z, f.z);
|
||||
|
||||
atomicAdd(&atom_virial[atom_i], abs_r * tempf);
|
||||
}
|
||||
}
|
||||
|
||||
void BondForceWithAtomVirial(int bond_numbers, int atom_numbers, const int *uint_crd_f, const float *scaler_f,
|
||||
const int *atom_a, const int *atom_b, const float *bond_k, const float *bond_r0,
|
||||
float *frc_f, float *atom_v, cudaStream_t stream) {
|
||||
Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128, 0, stream>>>(3 * atom_numbers, frc_f, 0.);
|
||||
Reset_List<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>(atom_numbers, atom_v, 0.);
|
||||
size_t thread_per_block = 128;
|
||||
size_t block_per_grid = ceilf(static_cast<float>(bond_numbers) / 128);
|
||||
UNSIGNED_INT_VECTOR *uint_crd =
|
||||
const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f));
|
||||
VECTOR *frc = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(frc_f));
|
||||
VECTOR *scaler = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(scaler_f));
|
||||
BondForceWithAtomVirialKernel<<<block_per_grid, thread_per_block, 0, stream>>>(bond_numbers, uint_crd, scaler, atom_a,
|
||||
atom_b, bond_k, bond_r0, frc, atom_v);
|
||||
return;
|
||||
}
|
||||
void BondForceWithAtomVirial(int bond_numbers, int atom_numbers, const int *uint_crd_f, const float *scaler_f,
|
||||
const int *atom_a, const int *atom_b, const float *bond_k, const float *bond_r0,
|
||||
float *frc_f, float *atom_v, cudaStream_t stream);
|
|
@ -1,28 +0,0 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_BOND_BOND_FORCE_WITH_ATOM_VIRIAL_IMPL_H_
|
||||
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_BOND_BOND_FORCE_WITH_ATOM_VIRIAL_IMPL_H_
|
||||
|
||||
#include <curand_kernel.h>
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
|
||||
|
||||
CUDA_LIB_EXPORT void BondForceWithAtomVirial(int bond_numbers, int atom_numbers, const int *uint_crd_f,
|
||||
const float *scaler_f, const int *atom_a, const int *atom_b,
|
||||
const float *bond_k, const float *bond_r0, float *frc_f, float *atom_v,
|
||||
cudaStream_t stream);
|
||||
#endif
|
|
@ -1,123 +0,0 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common/atomcrdtocv_impl.cuh"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common_sponge.cuh"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/util.cuh"
|
||||
|
||||
__device__ __host__ float fc(float Rij) {
|
||||
const float PI = 3.141592654;
|
||||
const float Rc = 1000.0;
|
||||
return 0.5 * cosf(PI / Rc * Rij) + 0.5;
|
||||
}
|
||||
|
||||
__global__ void Record_Box_Map_Times(int atom_numbers, const float *crd, const float *old_crd, float *box,
|
||||
int *box_map_times) {
|
||||
float half_box[3] = {0.5F * box[0], 0.5F * box[1], 0.5F * box[2]};
|
||||
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (i < atom_numbers) {
|
||||
if (crd[3 * i + 0] - old_crd[3 * i + 0] > half_box[0]) {
|
||||
box_map_times[3 * i + 0] = box_map_times[3 * i + 0] - 1;
|
||||
} else if (crd[3 * i + 0] - old_crd[3 * i + 0] < -half_box[0]) {
|
||||
box_map_times[3 * i + 0] = box_map_times[3 * i + 0] + 1;
|
||||
}
|
||||
if (crd[3 * i + 1] - old_crd[3 * i + 1] > half_box[1]) {
|
||||
box_map_times[3 * i + 1] = box_map_times[3 * i + 1] - 1;
|
||||
} else if (crd[3 * i + 1] - old_crd[3 * i + 1] < -half_box[1]) {
|
||||
box_map_times[3 * i + 1] = box_map_times[3 * i + 1] + 1;
|
||||
}
|
||||
if (crd[3 * i + 2] - old_crd[3 * i + 2] > half_box[2]) {
|
||||
box_map_times[3 * i + 2] = box_map_times[3 * i + 2] - 1;
|
||||
} else if (crd[3 * i + 2] - old_crd[3 * i + 2] < -half_box[2]) {
|
||||
box_map_times[3 * i + 2] = box_map_times[3 * i + 2] + 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void gen_nowarp_crd(int atom_numbers, const float *crd, float *box, int *box_map_times, float *nowarp_crd) {
|
||||
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (i < atom_numbers) {
|
||||
nowarp_crd[3 * i + 0] = static_cast<float>(box_map_times[3 * i + 0]) * box[0] + crd[3 * i + 0];
|
||||
nowarp_crd[3 * i + 1] = static_cast<float>(box_map_times[3 * i + 1]) * box[1] + crd[3 * i + 1];
|
||||
nowarp_crd[3 * i + 2] = static_cast<float>(box_map_times[3 * i + 2]) * box[2] + crd[3 * i + 2];
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void G_Radial(const int start_serial, const int end_serial, const float *crd, float *g_radial) {
|
||||
const float Rs = 0.5, Eta = 0.5;
|
||||
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (i >= start_serial && i < end_serial) {
|
||||
float rij;
|
||||
float g_radial_lin = 0.;
|
||||
for (int j = start_serial; j < end_serial; j = j + 1) {
|
||||
if (j != i) {
|
||||
// rij = sqrtf((crd[3*i+0] - crd[j]) * (crd[i] - crd[j]));
|
||||
rij = sqrtf(normfloat(crd, crd, i, j));
|
||||
g_radial_lin = g_radial_lin + expf(-Eta * (rij - Rs) * (rij - Rs)) * fc(rij);
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
g_radial[i] = g_radial_lin;
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void G_Angular(const int start_serial, const int end_serial, const float *crd, float *g_angular) {
|
||||
const float Rs = 0.5, Thetas = 3.14, Eta = 0.5, Zeta = 2.0;
|
||||
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (i >= start_serial && i < end_serial) {
|
||||
float rij, rik, rjk, theta_jik;
|
||||
float g_angular_lin = 0.;
|
||||
for (int j = start_serial; j < end_serial; j = j + 1) {
|
||||
if (j != i) {
|
||||
rij = sqrtf(normfloat(crd, crd, i, j));
|
||||
for (int k = j + 1; k < end_serial; k = k + 1) {
|
||||
if (k != i) {
|
||||
rik = sqrtf(normfloat(crd, crd, i, k));
|
||||
rjk = sqrtf(normfloat(crd, crd, j, k));
|
||||
theta_jik =
|
||||
acosf(fmaxf(fminf((rij * rij + rik * rik - rjk * rjk) / (2. * rij * rik), 0.999999), -0.999999));
|
||||
g_angular_lin = g_angular_lin + powf(1. + cosf(theta_jik - Thetas), Zeta) *
|
||||
expf(-Eta * powf(0.5 * (rij + rik) - Rs, 2.)) * fc(rij) * fc(rik);
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
g_angular[i] = powf(2., 1. - Zeta) * g_angular_lin;
|
||||
}
|
||||
}
|
||||
|
||||
void AtomCrdToCV(int atom_numbers, int start_serial, int end_serial, int number, const float *crd_f,
|
||||
const float *old_crd, float *nowarp_crd, int *box_map_times, float *box, float *g_radial,
|
||||
float *g_angular, cudaStream_t stream) {
|
||||
Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128, 0, stream>>>(3 * atom_numbers, box_map_times,
|
||||
0);
|
||||
Record_Box_Map_Times<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128, 0, stream>>>(
|
||||
atom_numbers, crd_f, old_crd, box, box_map_times);
|
||||
gen_nowarp_crd<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128, 0, stream>>>(atom_numbers, crd_f, box,
|
||||
box_map_times, nowarp_crd);
|
||||
G_Radial<<<1, number, 0, stream>>>(start_serial, end_serial, nowarp_crd, g_radial);
|
||||
G_Angular<<<1, number, 0, stream>>>(start_serial, end_serial, nowarp_crd, g_angular);
|
||||
return;
|
||||
}
|
||||
|
||||
void AtomCrdToCV(int atom_numbers, int start_serial, int end_serial, int number, const float *crd_f,
|
||||
const float *old_crd, float *nowarp_crd, int *box_map_times, float *box, float *g_radial,
|
||||
float *g_angular, cudaStream_t stream);
|
|
@ -1,27 +0,0 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_COMMON_ATOMCRDTOCV_IMPL_H_
|
||||
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_COMMON_ATOMCRDTOCV_IMPL_H_
|
||||
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
|
||||
|
||||
CUDA_LIB_EXPORT void AtomCrdToCV(int atom_numbers, int start_serial, int end_serial, int number, const float *crd_f,
|
||||
const float *old_crd, float *nowarp_crd, int *box_map_times, float *box,
|
||||
float *g_radial, float *g_angular, cudaStream_t stream);
|
||||
|
||||
#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_COMMON_ATOMCRDTOCV_IMPL_H_
|
|
@ -1,51 +0,0 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common_sponge.cuh"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common/crd_to_uint_crd_impl.cuh"
|
||||
|
||||
__global__ void Crd_To_Uint_Crd(const int atom_numbers, const VECTOR *scale_factor, const VECTOR *crd,
|
||||
UNSIGNED_INT_VECTOR *uint_crd) {
|
||||
int atom_i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (atom_i < atom_numbers) {
|
||||
uint_crd[atom_i].uint_x = crd[atom_i].x * scale_factor[0].x;
|
||||
uint_crd[atom_i].uint_y = crd[atom_i].y * scale_factor[0].y;
|
||||
uint_crd[atom_i].uint_z = crd[atom_i].z * scale_factor[0].z;
|
||||
/*uint_crd[atom_i].uint_x = 2 * uint_crd[atom_i].uint_x;
|
||||
uint_crd[atom_i].uint_y = 2 * uint_crd[atom_i].uint_y;
|
||||
uint_crd[atom_i].uint_z = 2 * uint_crd[atom_i].uint_z;*/
|
||||
uint_crd[atom_i].uint_x = uint_crd[atom_i].uint_x << 1;
|
||||
uint_crd[atom_i].uint_y = uint_crd[atom_i].uint_y << 1;
|
||||
uint_crd[atom_i].uint_z = uint_crd[atom_i].uint_z << 1;
|
||||
}
|
||||
}
|
||||
|
||||
void CrdToUintCrd(const int atom_numbers, const float *crd_to_uint_crd_cof_f, const float *crd_f,
|
||||
unsigned int *uint_crd_f, cudaStream_t stream) {
|
||||
VECTOR *crd = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(crd_f));
|
||||
VECTOR *crd_to_uint_crd_cof = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(crd_to_uint_crd_cof_f));
|
||||
|
||||
UNSIGNED_INT_VECTOR *uint_crd =
|
||||
const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f));
|
||||
|
||||
Crd_To_Uint_Crd<<<ceilf(static_cast<float>(atom_numbers) / 128.0), 128, 0, stream>>>(
|
||||
atom_numbers, crd_to_uint_crd_cof, crd, uint_crd);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
void CrdToUintCrd(const int atom_numbers, const float *crd_to_uint_crd_cof_f, const float *crd_f,
|
||||
unsigned int *uint_crd_f, cudaStream_t stream);
|
|
@ -1,27 +0,0 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_CRD_TO_UINT_CRD_IMPL_H_
|
||||
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_CRD_TO_UINT_CRD_IMPL_H_
|
||||
|
||||
#include <curand_kernel.h>
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
|
||||
|
||||
CUDA_LIB_EXPORT void CrdToUintCrd(const int atom_numbers, const float *crd_to_uint_crd_cof_f, const float *crd_f,
|
||||
unsigned int *uint_crd_f, cudaStream_t stream);
|
||||
|
||||
#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_CRD_TO_UINT_CRD_IMPL_H_
|
|
@ -1,54 +0,0 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common/crd_to_uint_crd_quarter_impl.cuh"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common_sponge.cuh"
|
||||
|
||||
__global__ void Crd_To_Uint_Crd_Quarter(const int atom_numbers, const VECTOR *scale_factor, const VECTOR *crd,
|
||||
UNSIGNED_INT_VECTOR *uint_crd) {
|
||||
int atom_i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (atom_i < atom_numbers) {
|
||||
INT_VECTOR tempi;
|
||||
VECTOR temp = crd[atom_i];
|
||||
temp.x *= scale_factor[0].x;
|
||||
temp.y *= scale_factor[0].y;
|
||||
temp.z *= scale_factor[0].z;
|
||||
|
||||
tempi.int_x = temp.x;
|
||||
tempi.int_y = temp.y;
|
||||
tempi.int_z = temp.z;
|
||||
|
||||
uint_crd[atom_i].uint_x = (tempi.int_x << 2);
|
||||
uint_crd[atom_i].uint_y = (tempi.int_y << 2);
|
||||
uint_crd[atom_i].uint_z = (tempi.int_z << 2);
|
||||
}
|
||||
}
|
||||
|
||||
void CrdToUintCrdQuarter(const int atom_numbers, const float *crd_to_uint_crd_cof_f, const float *crd_f,
|
||||
unsigned int *uint_crd_f, cudaStream_t stream) {
|
||||
VECTOR *crd = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(crd_f));
|
||||
VECTOR *crd_to_uint_crd_cof = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(crd_to_uint_crd_cof_f));
|
||||
|
||||
UNSIGNED_INT_VECTOR *uint_crd =
|
||||
const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f));
|
||||
|
||||
Crd_To_Uint_Crd_Quarter<<<ceilf(static_cast<float>(atom_numbers) / 128.0), 128, 0, stream>>>(
|
||||
atom_numbers, crd_to_uint_crd_cof, crd, uint_crd);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
void CrdToUintCrdQuarter(const int atom_numbers, const float *crd_to_uint_crd_cof_f, const float *crd_f,
|
||||
unsigned int *uint_crd_f, cudaStream_t stream);
|
|
@ -1,27 +0,0 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_CRD_TO_UINT_CRD_QUARTER_IMPL_H_
|
||||
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_CRD_TO_UINT_CRD_QUARTER_IMPL_H_
|
||||
|
||||
#include <curand_kernel.h>
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
|
||||
|
||||
CUDA_LIB_EXPORT void CrdToUintCrdQuarter(const int atom_numbers, const float *crd_to_uint_crd_cof_f, const float *crd_f,
|
||||
unsigned int *uint_crd_f, cudaStream_t stream);
|
||||
|
||||
#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_CRD_TO_UINT_CRD_QUARTER_IMPL_H_
|
|
@ -1,44 +0,0 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common_sponge.cuh"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common/get_center_of_mass_impl.cuh"
|
||||
|
||||
__global__ void Get_Center_Of_Mass(int residue_numbers, int *start, int *end, VECTOR *crd, float *atom_mass,
|
||||
float *residue_mass_inverse, VECTOR *center_of_mass) {
|
||||
for (int residue_i = blockDim.x * blockIdx.x + threadIdx.x; residue_i < residue_numbers;
|
||||
residue_i += gridDim.x * blockDim.x) {
|
||||
VECTOR com_lin = {0.0f, 0.0f, 0.0f};
|
||||
for (int atom_i = start[residue_i]; atom_i < end[residue_i]; atom_i += 1) {
|
||||
com_lin = com_lin + atom_mass[atom_i] * crd[atom_i];
|
||||
}
|
||||
center_of_mass[residue_i] = residue_mass_inverse[residue_i] * com_lin;
|
||||
}
|
||||
}
|
||||
|
||||
void GetCenterOfMass(int residue_numbers, int *start, int *end, float *crd_f, float *atom_mass,
|
||||
float *residue_mass_inverse, float *center_of_mass_f, cudaStream_t stream) {
|
||||
Reset_List<<<ceilf(static_cast<float>(3. * residue_numbers) / 128), 128, 0, stream>>>(3 * residue_numbers,
|
||||
center_of_mass_f, 0.);
|
||||
VECTOR *crd = reinterpret_cast<VECTOR *>(crd_f);
|
||||
VECTOR *center_of_mass = reinterpret_cast<VECTOR *>(center_of_mass_f);
|
||||
Get_Center_Of_Mass<<<20, 32, 0, stream>>>(residue_numbers, start, end, crd, atom_mass, residue_mass_inverse,
|
||||
center_of_mass);
|
||||
return;
|
||||
}
|
||||
|
||||
void GetCenterOfMass(int residue_numbers, int *start, int *end, float *crd_f, float *atom_mass,
|
||||
float *residue_mass_inverse, float *center_of_mass_f, cudaStream_t stream);
|
|
@ -1,27 +0,0 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_COMMON_GETCENTEROFMASS_IMPL_H_
|
||||
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_COMMON_GETCENTEROFMASS_IMPL_H_
|
||||
|
||||
#include <curand_kernel.h>
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
|
||||
|
||||
CUDA_LIB_EXPORT void GetCenterOfMass(int residue_numbers, int *start, int *end, float *crd_f, float *atom_mass,
|
||||
float *residue_mass_inverse, float *center_of_mass_f, cudaStream_t stream);
|
||||
|
||||
#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_COMMON_GETCENTER_IMPL_H_
|
|
@ -1,43 +0,0 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common_sponge.cuh"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common/getcenter_impl.cuh"
|
||||
|
||||
__global__ void GetCenterOfGeometryKernel(const int center_numbers, float center_numbers_inverse,
|
||||
const int *center_atoms, const VECTOR *crd, VECTOR *center_of_geometry) {
|
||||
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (i < center_numbers) {
|
||||
int atom_i = center_atoms[i];
|
||||
VECTOR temp = center_numbers_inverse * crd[atom_i];
|
||||
atomicAdd(¢er_of_geometry[0].x, temp.x);
|
||||
atomicAdd(¢er_of_geometry[0].y, temp.y);
|
||||
atomicAdd(¢er_of_geometry[0].z, temp.z);
|
||||
}
|
||||
}
|
||||
|
||||
void GetCenterOfGeometry(const int center_numbers, float center_numbers_inverse, const int *center_atoms,
|
||||
const float *crd_f, float *center_of_geometry_f, cudaStream_t stream) {
|
||||
VECTOR *crd = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(crd_f));
|
||||
VECTOR *center_of_geometry = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(center_of_geometry_f));
|
||||
GetCenterOfGeometryKernel<<<ceilf(static_cast<float>(center_numbers) / 32), 32, 0, stream>>>(
|
||||
center_numbers, center_numbers_inverse, center_atoms, crd, center_of_geometry);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
void GetCenterOfGeometry(const int center_numbers, float center_numbers_inverse, const int *center_atoms, float *crd_f,
|
||||
float *center_of_geometry_f, cudaStream_t stream);
|
|
@ -1,28 +0,0 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_COMMON_GETCENTER_IMPL_H_
|
||||
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_COMMON_GETCENTER_IMPL_H_
|
||||
|
||||
#include <curand_kernel.h>
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
|
||||
|
||||
CUDA_LIB_EXPORT void GetCenterOfGeometry(const int center_numbers, float center_numbers_inverse,
|
||||
const int *center_atoms, const float *crd_f, float *center_of_geometry_f,
|
||||
cudaStream_t stream);
|
||||
|
||||
#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_COMMON_GETCENTER_IMPL_H_
|
|
@ -1,51 +0,0 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common_sponge.cuh"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common/map_center_of_mass_impl.cuh"
|
||||
|
||||
__global__ void Map_Center_Of_Mass(int residue_numbers, int *start, int *end,
|
||||
float *scaler, VECTOR *center_of_mass, VECTOR *box_length, VECTOR *no_wrap_crd, VECTOR *crd) {
|
||||
VECTOR trans_vec;
|
||||
VECTOR com;
|
||||
for (int residue_i = blockDim.x*blockIdx.x + threadIdx.x; residue_i < residue_numbers;
|
||||
residue_i += gridDim.x * blockDim.x) {
|
||||
com = center_of_mass[residue_i];
|
||||
|
||||
trans_vec.x = com.x - floorf(com.x / box_length[0].x) * box_length[0].x;
|
||||
trans_vec.y = com.y - floorf(com.y / box_length[0].y) * box_length[0].y;
|
||||
trans_vec.z = com.z - floorf(com.z / box_length[0].z) * box_length[0].z;
|
||||
trans_vec = scaler[0] * trans_vec - com;
|
||||
|
||||
for (int atom_i = start[residue_i] + threadIdx.y; atom_i < end[residue_i]; atom_i += blockDim.y) {
|
||||
crd[atom_i] = no_wrap_crd[atom_i] + trans_vec;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void MapCenterOfMass(int residue_numbers, int *start, int *end, float *center_of_mass_f,
|
||||
float *box_length_f, float *no_wrap_crd_f, float *crd_f, float* scaler, cudaStream_t stream) {
|
||||
VECTOR *crd = reinterpret_cast<VECTOR *>(crd_f);
|
||||
VECTOR *no_wrap_crd = reinterpret_cast<VECTOR *>(no_wrap_crd_f);
|
||||
VECTOR *box_length = reinterpret_cast<VECTOR *>(box_length_f);
|
||||
VECTOR *center_of_mass = reinterpret_cast<VECTOR *>(center_of_mass_f);
|
||||
Map_Center_Of_Mass<<<20, { 32, 4 } , 0, stream>>>(residue_numbers, start, end, scaler, center_of_mass, box_length,
|
||||
no_wrap_crd, crd);
|
||||
return;
|
||||
}
|
||||
|
||||
void MapCenterOfMass(int residue_numbers, int *start, int *end, float *center_of_mass_f,
|
||||
float *box_length_f, float *no_wrap_crd_f, float *crd_f, float* scaler, cudaStream_t stream);
|
|
@ -1,28 +0,0 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_COMMON_MAPCENTEROFMASS_IMPL_H_
|
||||
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_COMMON_MAPCENTEROFMASS_IMPL_H_
|
||||
|
||||
#include <curand_kernel.h>
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
|
||||
|
||||
CUDA_LIB_EXPORT void MapCenterOfMass(int residue_numbers, int *start, int *end, float *center_of_mass_f,
|
||||
float *box_length_f, float *no_wrap_crd_f, float *crd_f, float *scaler,
|
||||
cudaStream_t stream);
|
||||
|
||||
#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_COMMON_MAPCENTEROFMASS_IMPL_H_
|
|
@ -1,49 +0,0 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common_sponge.cuh"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common/mdtemperature_impl.cuh"
|
||||
|
||||
__global__ void MDTemperatureKernel(const int residue_numbers, const int *start, const int *end, const VECTOR *atom_vel,
|
||||
const float *atom_mass, float *ek) {
|
||||
int residue_i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (residue_i < residue_numbers) {
|
||||
VECTOR momentum = {0., 0., 0.};
|
||||
float res_mass = 0.;
|
||||
int s = start[residue_i];
|
||||
int e = end[residue_i];
|
||||
float mass_lin;
|
||||
for (int atom_i = s; atom_i < e; atom_i = atom_i + 1) {
|
||||
mass_lin = atom_mass[atom_i];
|
||||
|
||||
momentum.x = momentum.x + mass_lin * atom_vel[atom_i].x;
|
||||
momentum.y = momentum.y + mass_lin * atom_vel[atom_i].y;
|
||||
momentum.z = momentum.z + mass_lin * atom_vel[atom_i].z;
|
||||
res_mass = res_mass + mass_lin;
|
||||
}
|
||||
ek[residue_i] = 0.5 * (momentum.x * momentum.x + momentum.y * momentum.y + momentum.z * momentum.z) / res_mass *
|
||||
2. / 3. / CONSTANT_kB / residue_numbers;
|
||||
}
|
||||
}
|
||||
|
||||
void MDTemperature(const int residue_numbers, const int *start, const int *end, const float *atom_vel_f,
|
||||
const float *atom_mass, float *ek, cudaStream_t stream) {
|
||||
VECTOR *atom_vel = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(atom_vel_f));
|
||||
MDTemperatureKernel<<<ceilf(static_cast<float>(residue_numbers) / 32), 32, 0, stream>>>(residue_numbers, start, end,
|
||||
atom_vel, atom_mass, ek);
|
||||
return;
|
||||
}
|
||||
void MDTemperature(const int residue_numbers, const int *start, const int *end, const float *atom_vel_f,
|
||||
const float *atom_mass, float *ek, cudaStream_t stream);
|
|
@ -1,26 +0,0 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_COMMON_MDTEMPERATURE_IMPL_H_
|
||||
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_COMMON_MDTEMPERATURE_IMPL_H_
|
||||
|
||||
#include <curand_kernel.h>
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
|
||||
|
||||
CUDA_LIB_EXPORT void MDTemperature(const int residue_numbers, const int *start, const int *end, const float *atom_vel_f,
|
||||
const float *atom_mass, float *ek, cudaStream_t stream);
|
||||
#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_COMMON_MDTEMPERATURE_IMPL_H_
|
|
@ -1,48 +0,0 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common/total_c6_get_impl.cuh"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common_sponge.cuh"
|
||||
|
||||
__global__ void Total_C6_Get(int atom_numbers, int *atom_lj_type, float *d_lj_b, float *d_factor) {
|
||||
int i, j;
|
||||
float temp_sum = 0;
|
||||
d_factor[0] = 0;
|
||||
int x, y;
|
||||
int itype, jtype, atom_pair_LJ_type;
|
||||
for (i = blockIdx.x * blockDim.x + threadIdx.x; i < atom_numbers; i += gridDim.x * blockDim.x) {
|
||||
itype = atom_lj_type[i];
|
||||
for (j = blockIdx.y * blockDim.y + threadIdx.y; j < atom_numbers; j += gridDim.y * blockDim.y) {
|
||||
jtype = atom_lj_type[j];
|
||||
y = (jtype - itype);
|
||||
x = y >> 31;
|
||||
y = (y ^ x) - x;
|
||||
x = jtype + itype;
|
||||
jtype = (x + y) >> 1;
|
||||
x = (x - y) >> 1;
|
||||
atom_pair_LJ_type = (jtype * (jtype + 1) >> 1) + x;
|
||||
temp_sum += d_lj_b[atom_pair_LJ_type];
|
||||
}
|
||||
}
|
||||
atomicAdd(d_factor, temp_sum);
|
||||
}
|
||||
|
||||
void total_c6_get(int atom_numbers, int *atom_lj_type, float *d_lj_b, float *d_factor, cudaStream_t stream) {
|
||||
Total_C6_Get<<<{4, 4}, {32, 32}, 0, stream>>>(atom_numbers, atom_lj_type, d_lj_b, d_factor);
|
||||
return;
|
||||
}
|
||||
|
||||
void total_c6_get(int atom_numbers, int *atom_lj_type, float *d_lj_b, float *d_factor, cudaStream_t stream);
|
|
@ -1,26 +0,0 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_COMMON_TOTAL_C6_GET_IMPL_H_
|
||||
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_COMMON_TOTAL_C6_GET_IMPL_H_
|
||||
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
|
||||
|
||||
CUDA_LIB_EXPORT void total_c6_get(int atom_numbers, int *atom_lj_type, float *d_lj_b, float *d_factor,
|
||||
cudaStream_t stream);
|
||||
|
||||
#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_COMMON_TOTAL_C6_GET_IMPL_H_
|
|
@ -1,366 +0,0 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_COMMON_SPONGE_H_
|
||||
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_COMMON_SPONGE_H_
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <math.h>
|
||||
#include <time.h>
|
||||
|
||||
#include <curand_kernel.h>
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include <device_launch_parameters.h>
|
||||
#include <cufft.h>
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
|
||||
|
||||
#define TWO_DIVIDED_BY_SQRT_PI 1.1283791670218446
|
||||
#define CONSTANT_kB 0.00198716
|
||||
#define CONSTANT_Pi 3.1415926535897932f
|
||||
static dim3 thread_LJ(8, 32);
|
||||
|
||||
__constant__ float XRD3D_Ma[4] = {1.0 / 6.0, -0.5, 0.5, -1.0 / 6.0};
|
||||
__constant__ float XRD3D_Mb[4] = {0, 0.5, -1, 0.5};
|
||||
__constant__ float XRD3D_Mc[4] = {0, 0.5, 0, -0.5};
|
||||
__constant__ float XRD3D_Md[4] = {0, 1.0 / 6.0, 4.0 / 6.0, 1.0 / 6.0};
|
||||
__constant__ float XRD3D_dMa[4] = {0.5, -1.5, 1.5, -0.5};
|
||||
__constant__ float XRD3D_dMb[4] = {0, 1, -2, 1};
|
||||
__constant__ float XRD3D_dMc[4] = {0, 0.5, 0, -0.5};
|
||||
|
||||
struct VECTOR {
|
||||
float x;
|
||||
float y;
|
||||
float z;
|
||||
};
|
||||
|
||||
struct INT_VECTOR {
|
||||
int int_x;
|
||||
int int_y;
|
||||
int int_z;
|
||||
};
|
||||
|
||||
struct UNSIGNED_INT_VECTOR {
|
||||
unsigned int uint_x;
|
||||
unsigned int uint_y;
|
||||
unsigned int uint_z;
|
||||
};
|
||||
|
||||
struct NEIGHBOR_LIST {
|
||||
int atom_numbers;
|
||||
int *atom_serial;
|
||||
};
|
||||
struct UINT_VECTOR_LJ_TYPE {
|
||||
unsigned int uint_x;
|
||||
unsigned int uint_y;
|
||||
unsigned int uint_z;
|
||||
int LJ_type;
|
||||
float charge;
|
||||
};
|
||||
|
||||
struct ATOM_NEAR {
|
||||
int *atom_serial;
|
||||
};
|
||||
|
||||
struct GRID_BUCKET {
|
||||
int *atom_serial;
|
||||
};
|
||||
|
||||
struct GRID_POINTER {
|
||||
int *grid_serial;
|
||||
};
|
||||
|
||||
struct VIRTUAL_TYPE_0 {
|
||||
float virtual_atom;
|
||||
float from_1;
|
||||
float h_double;
|
||||
};
|
||||
|
||||
struct VIRTUAL_TYPE_1 {
|
||||
float virtual_atom;
|
||||
float from_1;
|
||||
float from_2;
|
||||
float a;
|
||||
};
|
||||
|
||||
struct VIRTUAL_TYPE_2 {
|
||||
float virtual_atom;
|
||||
float from_1;
|
||||
float from_2;
|
||||
float from_3;
|
||||
float a;
|
||||
float b;
|
||||
};
|
||||
|
||||
struct VIRTUAL_TYPE_3 {
|
||||
float virtual_atom;
|
||||
float from_1;
|
||||
float from_2;
|
||||
float from_3;
|
||||
float d;
|
||||
float k;
|
||||
};
|
||||
|
||||
struct CONSTRAIN_PAIR {
|
||||
int atom_i_serial;
|
||||
int atom_j_serial;
|
||||
float constant_r;
|
||||
float constrain_k;
|
||||
};
|
||||
|
||||
__device__ __host__ static inline VECTOR operator-(const VECTOR &veca, const VECTOR &vecb) {
|
||||
VECTOR vec;
|
||||
vec.x = veca.x - vecb.x;
|
||||
vec.y = veca.y - vecb.y;
|
||||
vec.z = veca.z - vecb.z;
|
||||
return vec;
|
||||
}
|
||||
|
||||
__device__ __host__ static inline VECTOR Get_Periodic_Displacement(const UNSIGNED_INT_VECTOR uvec_a,
|
||||
const UNSIGNED_INT_VECTOR uvec_b,
|
||||
const VECTOR scaler) {
|
||||
VECTOR dr;
|
||||
dr.x = (static_cast<int>(uvec_a.uint_x - uvec_b.uint_x)) * scaler.x;
|
||||
dr.y = (static_cast<int>(uvec_a.uint_y - uvec_b.uint_y)) * scaler.y;
|
||||
dr.z = (static_cast<int>(uvec_a.uint_z - uvec_b.uint_z)) * scaler.z;
|
||||
return dr;
|
||||
}
|
||||
|
||||
__device__ __host__ static inline VECTOR Get_Periodic_Displacement(const UINT_VECTOR_LJ_TYPE uvec_a,
|
||||
const UINT_VECTOR_LJ_TYPE uvec_b,
|
||||
const VECTOR scaler) {
|
||||
VECTOR dr;
|
||||
dr.x = (static_cast<int>(uvec_a.uint_x - uvec_b.uint_x)) * scaler.x;
|
||||
dr.y = (static_cast<int>(uvec_a.uint_y - uvec_b.uint_y)) * scaler.y;
|
||||
dr.z = (static_cast<int>(uvec_a.uint_z - uvec_b.uint_z)) * scaler.z;
|
||||
return dr;
|
||||
}
|
||||
|
||||
__device__ __host__ static inline VECTOR Get_Periodic_Displacement(const VECTOR vec_a, const VECTOR vec_b,
|
||||
const VECTOR box_length) {
|
||||
VECTOR dr;
|
||||
dr = vec_a - vec_b;
|
||||
dr.x = dr.x - floorf(dr.x / box_length.x + 0.5) * box_length.x;
|
||||
dr.y = dr.y - floorf(dr.y / box_length.y + 0.5) * box_length.y;
|
||||
dr.z = dr.z - floorf(dr.z / box_length.z + 0.5) * box_length.z;
|
||||
return dr;
|
||||
}
|
||||
|
||||
__device__ __host__ static inline VECTOR Get_Periodic_Displacement(const VECTOR vec_a, const VECTOR vec_b,
|
||||
const VECTOR box_length,
|
||||
const VECTOR box_length_inverse) {
|
||||
VECTOR dr;
|
||||
dr = vec_a - vec_b;
|
||||
dr.x = dr.x - floorf(dr.x * box_length_inverse.x + 0.5) * box_length.x;
|
||||
dr.y = dr.y - floorf(dr.y * box_length_inverse.y + 0.5) * box_length.y;
|
||||
dr.z = dr.z - floorf(dr.z * box_length_inverse.z + 0.5) * box_length.z;
|
||||
return dr;
|
||||
}
|
||||
|
||||
__device__ __host__ static inline VECTOR operator+(const VECTOR &veca, const VECTOR &vecb) {
|
||||
VECTOR vec;
|
||||
vec.x = veca.x + vecb.x;
|
||||
vec.y = veca.y + vecb.y;
|
||||
vec.z = veca.z + vecb.z;
|
||||
return vec;
|
||||
}
|
||||
|
||||
__device__ __host__ static inline float operator*(const VECTOR &veca, const VECTOR &vecb) {
|
||||
return veca.x * vecb.x + veca.y * vecb.y + veca.z * vecb.z;
|
||||
}
|
||||
__device__ __host__ static inline VECTOR operator*(const float &a, const VECTOR &vecb) {
|
||||
VECTOR vec;
|
||||
vec.x = a * vecb.x;
|
||||
vec.y = a * vecb.y;
|
||||
vec.z = a * vecb.z;
|
||||
return vec;
|
||||
}
|
||||
|
||||
__device__ __host__ static inline VECTOR operator-(const VECTOR &vecb) {
|
||||
VECTOR vec;
|
||||
vec.x = -vecb.x;
|
||||
vec.y = -vecb.y;
|
||||
vec.z = -vecb.z;
|
||||
return vec;
|
||||
}
|
||||
|
||||
__device__ __host__ static inline VECTOR operator^(const VECTOR &veca, const VECTOR &vecb) {
|
||||
VECTOR vec;
|
||||
vec.x = veca.y * vecb.z - veca.z * vecb.y;
|
||||
vec.y = veca.z * vecb.x - veca.x * vecb.z;
|
||||
vec.z = veca.x * vecb.y - veca.y * vecb.x;
|
||||
return vec;
|
||||
}
|
||||
|
||||
__device__ __host__ static inline float normfloat(const float *x, const float *y, int i, int j) {
|
||||
float s = 0;
|
||||
s += (x[3 * i + 0] - y[3 * j + 0]) * (x[3 * i + 0] - y[3 * j + 0]);
|
||||
s += (x[3 * i + 1] - y[3 * j + 1]) * (x[3 * i + 1] - y[3 * j + 1]);
|
||||
s += (x[3 * i + 2] - y[3 * j + 2]) * (x[3 * i + 2] - y[3 * j + 2]);
|
||||
return s;
|
||||
}
|
||||
|
||||
__global__ static void construct_neighbor_list_kernel(int atom_numbers, int max_neighbor_numbers, int *nl_atom_numbers,
|
||||
int *nl_atom_serial, NEIGHBOR_LIST *nl) {
|
||||
for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < atom_numbers; i += gridDim.x * blockDim.x) {
|
||||
nl[i].atom_numbers = nl_atom_numbers[i];
|
||||
nl[i].atom_serial = nl_atom_serial + i * max_neighbor_numbers;
|
||||
}
|
||||
}
|
||||
|
||||
__global__ static void construct_atom_near(int atom_numbers, int near_numbers, int *atom_serial, ATOM_NEAR *an) {
|
||||
for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < atom_numbers; i += gridDim.x * blockDim.x) {
|
||||
an[i].atom_serial = atom_serial + i * near_numbers;
|
||||
}
|
||||
}
|
||||
|
||||
static inline bool Malloc_Safely(void **address, size_t size) {
|
||||
address[0] = NULL;
|
||||
address[0] = reinterpret_cast<void *>(malloc(size));
|
||||
if (address[0] != NULL) {
|
||||
return true;
|
||||
} else {
|
||||
printf("malloc failed!\n");
|
||||
getchar();
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static inline bool Cuda_Malloc_Safely(void **address, size_t size) {
|
||||
cudaError_t cuda_error = cudaMalloc(&address[0], size);
|
||||
if (cuda_error == 0) {
|
||||
return true;
|
||||
} else {
|
||||
printf("cudaMalloc failed! error %d\n", cuda_error);
|
||||
getchar();
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
__global__ static void construct_constrain_pair(int constrain_pair_numbers, const int *atom_i_serials,
|
||||
const int *atom_j_serials, const float *constant_rs,
|
||||
const float *constrain_ks, CONSTRAIN_PAIR *constrain_pair) {
|
||||
int atom_i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (atom_i < constrain_pair_numbers) {
|
||||
constrain_pair[atom_i].atom_i_serial = atom_i_serials[atom_i];
|
||||
constrain_pair[atom_i].atom_j_serial = atom_j_serials[atom_i];
|
||||
constrain_pair[atom_i].constant_r = constant_rs[atom_i];
|
||||
constrain_pair[atom_i].constrain_k = constrain_ks[atom_i];
|
||||
}
|
||||
}
|
||||
|
||||
__global__ static void Copy_Crd_To_New_Crd_Start(const int atom_numbers, const UNSIGNED_INT_VECTOR *crd,
|
||||
UINT_VECTOR_LJ_TYPE *new_crd, const int *LJ_type,
|
||||
const float *charge) {
|
||||
int atom_i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (atom_i < atom_numbers) {
|
||||
new_crd[atom_i].uint_x = crd[atom_i].uint_x;
|
||||
new_crd[atom_i].uint_y = crd[atom_i].uint_y;
|
||||
new_crd[atom_i].uint_z = crd[atom_i].uint_z;
|
||||
new_crd[atom_i].LJ_type = LJ_type[atom_i];
|
||||
new_crd[atom_i].charge = charge[atom_i];
|
||||
}
|
||||
}
|
||||
|
||||
// void Constrain_Force_Cycle_With_Virial(int atom_numbers, int constrain_pair_numbers, const unsigned int *uint_crd_f,
|
||||
// const float *scaler_f, float *constrain_pair_f, const float *pair_dr_f,
|
||||
// const int *atom_i_serials, const int *atom_j_serials, const float
|
||||
// *constant_rs, const float *constrain_ks, float *test_frc_f, float
|
||||
// *d_atom_virial, cudaStream_t stream);
|
||||
|
||||
__global__ static void Rand_Normal(const int float4_numbers, curandStatePhilox4_32_10_t *rand_state,
|
||||
float4 *rand_float4) {
|
||||
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (i < float4_numbers) {
|
||||
rand_float4[i] = curand_normal4(&rand_state[i]);
|
||||
}
|
||||
}
|
||||
|
||||
__global__ static void Setup_Rand_Normal_Kernel(const int float4_numbers, curandStatePhilox4_32_10_t *rand_state,
|
||||
const int seed) {
|
||||
int id = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
/* Each thread gets same seed, a different sequence
|
||||
number, no offset */
|
||||
if (id < float4_numbers) {
|
||||
curand_init(seed, id, 0, &rand_state[id]);
|
||||
}
|
||||
}
|
||||
|
||||
__global__ static void Reset_List(const int element_numbers, int *list, const int replace_element) {
|
||||
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (i < element_numbers) {
|
||||
list[i] = replace_element;
|
||||
}
|
||||
}
|
||||
|
||||
__global__ static void Reset_List(const int element_numbers, float *list, const float replace_element) {
|
||||
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (i < element_numbers) {
|
||||
list[i] = replace_element;
|
||||
}
|
||||
}
|
||||
|
||||
__global__ static void Sum_Of_List(const int element_numbers, const float *list, float *sum) {
|
||||
if (threadIdx.x == 0) {
|
||||
sum[0] = 0.;
|
||||
}
|
||||
__syncthreads();
|
||||
float lin = 0.;
|
||||
for (int i = threadIdx.x; i < element_numbers; i = i + blockDim.x) {
|
||||
lin = lin + list[i];
|
||||
}
|
||||
atomicAdd(sum, lin);
|
||||
}
|
||||
|
||||
__global__ static void Scale_List(const int element_numbers, float *list, float scaler) {
|
||||
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (i < element_numbers) {
|
||||
list[i] = list[i] * scaler;
|
||||
}
|
||||
}
|
||||
|
||||
__global__ static void Copy_List(const int element_numbers, const int *origin_list, int *list) {
|
||||
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (i < element_numbers) {
|
||||
list[i] = origin_list[i];
|
||||
}
|
||||
}
|
||||
__global__ static void Copy_List(const int element_numbers, const float *origin_list, float *list) {
|
||||
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (i < element_numbers) {
|
||||
list[i] = origin_list[i];
|
||||
}
|
||||
}
|
||||
|
||||
__global__ static void Print(const size_t size, const float *input_x) {
|
||||
for (size_t i = 0; i < size; i++) {
|
||||
printf("%f\n", input_x[i]);
|
||||
}
|
||||
return;
|
||||
}
|
||||
__global__ static void Print(const size_t size, const int *input_x) {
|
||||
for (size_t i = 0; i < size; i++) {
|
||||
printf("%d\n", input_x[i]);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
__device__ static VECTOR Make_Vector_Not_Exceed_Value(VECTOR vector, const float value) {
|
||||
return fminf(1.0, value * rnorm3df(vector.x, vector.y, vector.z)) * vector;
|
||||
}
|
||||
|
||||
#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_COMMON_SPONGE_H_
|
|
@ -1,46 +0,0 @@
|
|||
|
||||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/sponge/crdmcmap/cal_no_wrap_crd_impl.cuh"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common_sponge.cuh"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/util.cuh"
|
||||
|
||||
__global__ void Calculate_No_Wrap_Crd(int atom_numbers, INT_VECTOR *box_map_times, VECTOR *box, VECTOR *crd,
|
||||
VECTOR *nowrap_crd) {
|
||||
for (int i = threadIdx.x; i < atom_numbers; i = i + blockDim.x) {
|
||||
nowrap_crd[i].x = static_cast<float>(box_map_times[i].int_x) * box[0].x + crd[i].x;
|
||||
nowrap_crd[i].y = static_cast<float>(box_map_times[i].int_y) * box[0].y + crd[i].y;
|
||||
nowrap_crd[i].z = static_cast<float>(box_map_times[i].int_z) * box[0].z + crd[i].z;
|
||||
}
|
||||
}
|
||||
|
||||
void calculatenowrapcrd(int atom_numbers, int *box_map_times_f, float *box_f, float *crd_f, float *nowrap_crd_f,
|
||||
cudaStream_t stream) {
|
||||
Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128, 0, stream>>>(3 * atom_numbers, nowrap_crd_f,
|
||||
0.);
|
||||
INT_VECTOR *box_map_times = reinterpret_cast<INT_VECTOR *>(box_map_times_f);
|
||||
VECTOR *box = reinterpret_cast<VECTOR *>(box_f);
|
||||
VECTOR *crd = reinterpret_cast<VECTOR *>(crd_f);
|
||||
VECTOR *nowrap_crd = reinterpret_cast<VECTOR *>(nowrap_crd_f);
|
||||
|
||||
Calculate_No_Wrap_Crd<<<20, 256, 0, stream>>>(atom_numbers, box_map_times, box, crd,
|
||||
nowrap_crd);
|
||||
return;
|
||||
}
|
||||
|
||||
void calculatenowrapcrd(int atom_numbers, int *box_map_times_f, float *box_f, float *crd_f, float *nowrap_crd_f,
|
||||
cudaStream_t stream);
|
|
@ -1,28 +0,0 @@
|
|||
|
||||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_CRDMCMAP_CAL_NO_WRAP_CRD_IMPL_H_
|
||||
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_CRDMCMAP_CAL_NO_WRAP_CRD_IMPL_H_
|
||||
|
||||
#include <curand_kernel.h>
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
|
||||
|
||||
CUDA_LIB_EXPORT void calculatenowrapcrd(int atom_numbers, int *box_map_times_f, float *box_f, float *crd_f,
|
||||
float *nowrap_crd_f, cudaStream_t stream);
|
||||
|
||||
#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_CRDMCMAP_CAL_NO_WRAP_CRD_IMPL_H_
|
|
@ -1,47 +0,0 @@
|
|||
|
||||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/sponge/crdmcmap/refresh_boxmaptimes_impl.cuh"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common_sponge.cuh"
|
||||
|
||||
__global__ void Refresh_BoxMapTimes_CUDA(int atom_numbers, VECTOR *box_length_inverse, VECTOR *crd,
|
||||
INT_VECTOR *box_map_times, VECTOR *old_crd) {
|
||||
VECTOR crd_i, old_crd_i;
|
||||
for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < atom_numbers; i += gridDim.x * blockDim.x) {
|
||||
crd_i = crd[i];
|
||||
old_crd_i = old_crd[i];
|
||||
box_map_times[i].int_x += floor((old_crd_i.x - crd_i.x) * box_length_inverse[0].x + 0.5);
|
||||
box_map_times[i].int_y += floor((old_crd_i.y - crd_i.y) * box_length_inverse[0].y + 0.5);
|
||||
box_map_times[i].int_z += floor((old_crd_i.z - crd_i.z) * box_length_inverse[0].z + 0.5);
|
||||
old_crd[i] = crd_i;
|
||||
}
|
||||
}
|
||||
|
||||
void refresh_boxmaptimes(int atom_numbers, float *box_length_inverse_f, float *crd_f, float *old_crd_f,
|
||||
int *box_map_times_f, cudaStream_t stream) {
|
||||
INT_VECTOR *box_map_times = reinterpret_cast<INT_VECTOR *>(box_map_times_f);
|
||||
VECTOR *box_length_inverse = reinterpret_cast<VECTOR *>(box_length_inverse_f);
|
||||
VECTOR *crd = reinterpret_cast<VECTOR *>(crd_f);
|
||||
VECTOR *old_crd = reinterpret_cast<VECTOR *>(old_crd_f);
|
||||
|
||||
Refresh_BoxMapTimes_CUDA<<<1, 256, 0, stream>>>(atom_numbers, box_length_inverse, crd,
|
||||
box_map_times, old_crd);
|
||||
return;
|
||||
}
|
||||
|
||||
void refresh_boxmaptimes(int atom_numbers, float *box_length_inverse, float *crd_f, float *old_crd_f,
|
||||
int *box_map_times_f, cudaStream_t stream);
|
|
@ -1,28 +0,0 @@
|
|||
|
||||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_CRDMCMAP_REFRESH_BOXMAPTIMES_IMPL_H_
|
||||
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_CRDMCMAP_REFRESH_BOXMAPTIMES_IMPL_H_
|
||||
|
||||
#include <curand_kernel.h>
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
|
||||
|
||||
CUDA_LIB_EXPORT void refresh_boxmaptimes(int atom_numbers, float *box_length_inverse, float *crd_f, float *old_crd_f,
|
||||
int *box_map_times_f, cudaStream_t stream);
|
||||
|
||||
#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_CRDMCMAP_REFRESH_BOXMAPTIMES_IMPL_H_
|
|
@ -1,84 +0,0 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/sponge/dihedral/dihedral_atom_energy_impl.cuh"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/util.cuh"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common_sponge.cuh"
|
||||
|
||||
__global__ void DihedralAtomEnergyKernel(int dihedral_numbers, const UNSIGNED_INT_VECTOR *uint_crd,
|
||||
const VECTOR *scaler, const int *atom_a, const int *atom_b, const int *atom_c,
|
||||
const int *atom_d, const int *ipn, const float *pk, const float *gamc,
|
||||
const float *gams, const float *pn, float *ene) {
|
||||
int dihedral_i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (dihedral_i < dihedral_numbers) {
|
||||
int atom_i = atom_a[dihedral_i];
|
||||
int atom_j = atom_b[dihedral_i];
|
||||
int atom_k = atom_c[dihedral_i];
|
||||
int atom_l = atom_d[dihedral_i];
|
||||
|
||||
float temp_pk = pk[dihedral_i];
|
||||
float temp_pn = pn[dihedral_i];
|
||||
float temp_gamc = gamc[dihedral_i];
|
||||
float temp_gams = gams[dihedral_i];
|
||||
|
||||
VECTOR drij = Get_Periodic_Displacement(uint_crd[atom_i], uint_crd[atom_j], scaler[0]);
|
||||
VECTOR drkj = Get_Periodic_Displacement(uint_crd[atom_k], uint_crd[atom_j], scaler[0]);
|
||||
VECTOR drkl = Get_Periodic_Displacement(uint_crd[atom_k], uint_crd[atom_l], scaler[0]);
|
||||
|
||||
VECTOR r1 = drij ^ drkj;
|
||||
VECTOR r2 = drkl ^ drkj;
|
||||
|
||||
float r1_1 = rnorm3df(r1.x, r1.y, r1.z);
|
||||
float r2_1 = rnorm3df(r2.x, r2.y, r2.z);
|
||||
float r1_1_r2_1 = r1_1 * r2_1;
|
||||
|
||||
float phi = r1 * r2 * r1_1_r2_1;
|
||||
phi = fmaxf(-0.999999, fminf(phi, 0.999999));
|
||||
phi = acosf(phi);
|
||||
|
||||
float sign = (r2 ^ r1) * drkj;
|
||||
copysignf(phi, sign);
|
||||
|
||||
phi = CONSTANT_Pi - phi;
|
||||
|
||||
float nphi = temp_pn * phi;
|
||||
|
||||
float cos_nphi = cosf(nphi);
|
||||
float sin_nphi = sinf(nphi);
|
||||
|
||||
atomicAdd(&ene[atom_i], (temp_pk + cos_nphi * temp_gamc + sin_nphi * temp_gams));
|
||||
}
|
||||
}
|
||||
|
||||
void DihedralAtomEnergy(int dihedral_numbers, int atom_numbers, const int *uint_crd_f, const float *scaler_f,
|
||||
const int *atom_a, const int *atom_b, const int *atom_c, const int *atom_d, const int *ipn,
|
||||
const float *pk, const float *gamc, const float *gams, const float *pn, float *ene,
|
||||
cudaStream_t stream) {
|
||||
Reset_List<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>(atom_numbers, ene, 0.);
|
||||
size_t thread_per_block = 128;
|
||||
size_t block_per_grid = ceilf(static_cast<float>(dihedral_numbers) / 128);
|
||||
UNSIGNED_INT_VECTOR *uint_crd =
|
||||
const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f));
|
||||
VECTOR *scaler = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(scaler_f));
|
||||
|
||||
DihedralAtomEnergyKernel<<<block_per_grid, thread_per_block, 0, stream>>>(
|
||||
dihedral_numbers, uint_crd, scaler, atom_a, atom_b, atom_c, atom_d, ipn, pk, gamc, gams, pn, ene);
|
||||
return;
|
||||
}
|
||||
void DihedralAtomEnergy(int dihedral_numbers, int atom_numbers, const int *uint_crd_f, const float *scaler_f,
|
||||
const int *atom_a, const int *atom_b, const int *atom_c, const int *atom_d, const int *ipn,
|
||||
const float *pk, const float *gamc, const float *gams, const float *pn, float *ene,
|
||||
cudaStream_t stream);
|
|
@ -1,28 +0,0 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_DIHEDRAL_DIHEDRAL_ATOM_ENERGY_IMPL_H_
|
||||
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_DIHEDRAL_DIHEDRAL_ATOM_ENERGY_IMPL_H_
|
||||
|
||||
#include <curand_kernel.h>
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
|
||||
|
||||
CUDA_LIB_EXPORT void DihedralAtomEnergy(int dihedral_numbers, int atom_numbers, const int *uint_crd_f,
|
||||
const float *scaler_f, const int *atom_a, const int *atom_b, const int *atom_c,
|
||||
const int *atom_d, const int *ipn, const float *pk, const float *gamc,
|
||||
const float *gams, const float *pn, float *ene, cudaStream_t stream);
|
||||
#endif
|
|
@ -1,81 +0,0 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/sponge/dihedral/dihedral_energy_impl.cuh"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/util.cuh"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common_sponge.cuh"
|
||||
|
||||
__global__ void DihedralEnergyKernel(int dihedral_numbers, const UNSIGNED_INT_VECTOR *uint_crd, const VECTOR *scaler,
|
||||
const int *atom_a, const int *atom_b, const int *atom_c, const int *atom_d,
|
||||
const int *ipn, const float *pk, const float *gamc, const float *gams,
|
||||
const float *pn, float *ene) {
|
||||
int dihedral_i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (dihedral_i < dihedral_numbers) {
|
||||
int atom_i = atom_a[dihedral_i];
|
||||
int atom_j = atom_b[dihedral_i];
|
||||
int atom_k = atom_c[dihedral_i];
|
||||
int atom_l = atom_d[dihedral_i];
|
||||
|
||||
float temp_pk = pk[dihedral_i];
|
||||
float temp_pn = pn[dihedral_i];
|
||||
float temp_gamc = gamc[dihedral_i];
|
||||
float temp_gams = gams[dihedral_i];
|
||||
|
||||
VECTOR drij = Get_Periodic_Displacement(uint_crd[atom_i], uint_crd[atom_j], scaler[0]);
|
||||
VECTOR drkj = Get_Periodic_Displacement(uint_crd[atom_k], uint_crd[atom_j], scaler[0]);
|
||||
VECTOR drkl = Get_Periodic_Displacement(uint_crd[atom_k], uint_crd[atom_l], scaler[0]);
|
||||
|
||||
VECTOR r1 = drij ^ drkj;
|
||||
VECTOR r2 = drkl ^ drkj;
|
||||
|
||||
float r1_1 = rnorm3df(r1.x, r1.y, r1.z);
|
||||
float r2_1 = rnorm3df(r2.x, r2.y, r2.z);
|
||||
float r1_1_r2_1 = r1_1 * r2_1;
|
||||
|
||||
float phi = r1 * r2 * r1_1_r2_1;
|
||||
phi = fmaxf(-0.999999, fminf(phi, 0.999999));
|
||||
phi = acosf(phi);
|
||||
|
||||
float sign = (r2 ^ r1) * drkj;
|
||||
copysignf(phi, sign);
|
||||
|
||||
phi = CONSTANT_Pi - phi;
|
||||
|
||||
float nphi = temp_pn * phi;
|
||||
|
||||
float cos_nphi = cosf(nphi);
|
||||
float sin_nphi = sinf(nphi);
|
||||
|
||||
ene[dihedral_i] = (temp_pk + cos_nphi * temp_gamc + sin_nphi * temp_gams);
|
||||
}
|
||||
}
|
||||
|
||||
void DihedralEnergy(int dihedral_numbers, const int *uint_crd_f, const float *scaler_f, const int *atom_a,
|
||||
const int *atom_b, const int *atom_c, const int *atom_d, const int *ipn, const float *pk,
|
||||
const float *gamc, const float *gams, const float *pn, float *ene, cudaStream_t stream) {
|
||||
size_t thread_per_block = 128;
|
||||
size_t block_per_grid = ceilf(static_cast<float>(dihedral_numbers) / 128);
|
||||
UNSIGNED_INT_VECTOR *uint_crd =
|
||||
const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f));
|
||||
VECTOR *scaler = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(scaler_f));
|
||||
|
||||
DihedralEnergyKernel<<<block_per_grid, thread_per_block, 0, stream>>>(
|
||||
dihedral_numbers, uint_crd, scaler, atom_a, atom_b, atom_c, atom_d, ipn, pk, gamc, gams, pn, ene);
|
||||
return;
|
||||
}
|
||||
void DihedralEnergy(int dihedral_numbers, const int *uint_crd_f, const float *scaler_f, const int *atom_a,
|
||||
const int *atom_b, const int *atom_c, const int *atom_d, const int *ipn, const float *pk,
|
||||
const float *gamc, const float *gams, const float *pn, float *ene, cudaStream_t stream);
|
|
@ -1,28 +0,0 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_DIHEDRAL_DIHEDRAL_ENERGY_IMPL_H_
|
||||
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_DIHEDRAL_DIHEDRAL_ENERGY_IMPL_H_
|
||||
|
||||
#include <curand_kernel.h>
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
|
||||
|
||||
CUDA_LIB_EXPORT void DihedralEnergy(int dihedral_numbers, const int *uint_crd_f, const float *scaler_f,
|
||||
const int *atom_a, const int *atom_b, const int *atom_c, const int *atom_d,
|
||||
const int *ipn, const float *pk, const float *gamc, const float *gams,
|
||||
const float *pn, float *ene, cudaStream_t stream);
|
||||
#endif
|
|
@ -1,121 +0,0 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/sponge/dihedral/dihedral_force_impl.cuh"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/util.cuh"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common_sponge.cuh"
|
||||
|
||||
__global__ void DihedralForceKernel(int dihedral_numbers, const UNSIGNED_INT_VECTOR *uint_crd, const VECTOR *scaler,
|
||||
const int *atom_a, const int *atom_b, const int *atom_c, const int *atom_d,
|
||||
const int *ipn, const float *pk, const float *gamc, const float *gams,
|
||||
const float *pn, VECTOR *frc) {
|
||||
int dihedral_i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (dihedral_i < dihedral_numbers) {
|
||||
int atom_i = atom_a[dihedral_i];
|
||||
int atom_j = atom_b[dihedral_i];
|
||||
int atom_k = atom_c[dihedral_i];
|
||||
int atom_l = atom_d[dihedral_i];
|
||||
|
||||
int temp_ipn = ipn[dihedral_i];
|
||||
|
||||
float temp_pn = pn[dihedral_i];
|
||||
float temp_gamc = gamc[dihedral_i];
|
||||
float temp_gams = gams[dihedral_i];
|
||||
|
||||
VECTOR drij = Get_Periodic_Displacement(uint_crd[atom_i], uint_crd[atom_j], scaler[0]);
|
||||
VECTOR drkj = Get_Periodic_Displacement(uint_crd[atom_k], uint_crd[atom_j], scaler[0]);
|
||||
VECTOR drkl = Get_Periodic_Displacement(uint_crd[atom_k], uint_crd[atom_l], scaler[0]);
|
||||
|
||||
VECTOR r1 = drij ^ drkj;
|
||||
VECTOR r2 = drkl ^ drkj;
|
||||
|
||||
float r1_1 = rnorm3df(r1.x, r1.y, r1.z);
|
||||
float r2_1 = rnorm3df(r2.x, r2.y, r2.z);
|
||||
float r1_2 = r1_1 * r1_1;
|
||||
float r2_2 = r2_1 * r2_1;
|
||||
float r1_1_r2_1 = r1_1 * r2_1;
|
||||
|
||||
float phi = r1 * r2 * r1_1_r2_1;
|
||||
phi = fmaxf(-0.999999, fminf(phi, 0.999999));
|
||||
phi = acosf(phi);
|
||||
|
||||
float sign = (r2 ^ r1) * drkj;
|
||||
copysignf(phi, sign);
|
||||
|
||||
phi = CONSTANT_Pi - phi;
|
||||
|
||||
float nphi = temp_pn * phi;
|
||||
|
||||
float cos_phi = cosf(phi);
|
||||
float sin_phi = sinf(phi);
|
||||
float cos_nphi = cosf(nphi);
|
||||
float sin_nphi = sinf(nphi);
|
||||
|
||||
float dE_dphi;
|
||||
if (fabsf(sin_phi) < 1e-6) {
|
||||
temp_ipn *= temp_ipn % 2; // (((temp_ipn - 1) & 1) ^ 1)
|
||||
dE_dphi = temp_gamc * (temp_pn - temp_ipn + temp_ipn * cos_phi);
|
||||
} else {
|
||||
dE_dphi = temp_pn * (temp_gamc * sin_nphi - temp_gams * cos_nphi) / sin_phi;
|
||||
}
|
||||
|
||||
VECTOR dphi_dr1 = r1_1_r2_1 * r2 + cos_phi * r1_2 * r1;
|
||||
VECTOR dphi_dr2 = r1_1_r2_1 * r1 + cos_phi * r2_2 * r2;
|
||||
|
||||
VECTOR dE_dri = dE_dphi * drkj ^ dphi_dr1;
|
||||
VECTOR dE_drl = dE_dphi * dphi_dr2 ^ drkj;
|
||||
VECTOR dE_drj_part = dE_dphi * ((drij ^ dphi_dr1) + (drkl ^ dphi_dr2));
|
||||
|
||||
VECTOR fi = dE_dri;
|
||||
VECTOR fj = dE_drj_part - dE_dri;
|
||||
VECTOR fk = -dE_drl - dE_drj_part;
|
||||
VECTOR fl = dE_drl;
|
||||
|
||||
atomicAdd(&frc[atom_i].x, fi.x);
|
||||
atomicAdd(&frc[atom_i].y, fi.y);
|
||||
atomicAdd(&frc[atom_i].z, fi.z);
|
||||
atomicAdd(&frc[atom_j].x, fj.x);
|
||||
atomicAdd(&frc[atom_j].y, fj.y);
|
||||
atomicAdd(&frc[atom_j].z, fj.z);
|
||||
atomicAdd(&frc[atom_k].x, fk.x);
|
||||
atomicAdd(&frc[atom_k].y, fk.y);
|
||||
atomicAdd(&frc[atom_k].z, fk.z);
|
||||
atomicAdd(&frc[atom_l].x, fl.x);
|
||||
atomicAdd(&frc[atom_l].y, fl.y);
|
||||
atomicAdd(&frc[atom_l].z, fl.z);
|
||||
}
|
||||
}
|
||||
|
||||
void DihedralForce(int dihedral_numbers, int atom_numbers, const int *uint_crd_f, const float *scaler_f,
|
||||
const int *atom_a, const int *atom_b, const int *atom_c, const int *atom_d, const int *ipn,
|
||||
const float *pk, const float *gamc, const float *gams, const float *pn, float *frc_f,
|
||||
cudaStream_t stream) {
|
||||
Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128, 0, stream>>>(3 * atom_numbers, frc_f, 0.);
|
||||
size_t thread_per_block = 128;
|
||||
size_t block_per_grid = ceilf(static_cast<float>(dihedral_numbers) / 128);
|
||||
UNSIGNED_INT_VECTOR *uint_crd =
|
||||
const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f));
|
||||
VECTOR *frc = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(frc_f));
|
||||
VECTOR *scaler = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(scaler_f));
|
||||
|
||||
DihedralForceKernel<<<block_per_grid, thread_per_block, 0, stream>>>(
|
||||
dihedral_numbers, uint_crd, scaler, atom_a, atom_b, atom_c, atom_d, ipn, pk, gamc, gams, pn, frc);
|
||||
return;
|
||||
}
|
||||
void DihedralForce(int dihedral_numbers, int atom_numbers, const int *uint_crd_f, const float *scaler_f,
|
||||
const int *atom_a, const int *atom_b, const int *atom_c, const int *atom_d, const int *ipn,
|
||||
const float *pk, const float *gamc, const float *gams, const float *pn, float *frc_f,
|
||||
cudaStream_t stream);
|
|
@ -1,28 +0,0 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_DIHEDRAL_DIHEDRAL_FORCE_IMPL_H_
|
||||
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_DIHEDRAL_DIHEDRAL_FORCE_IMPL_H_
|
||||
|
||||
#include <curand_kernel.h>
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
|
||||
|
||||
CUDA_LIB_EXPORT void DihedralForce(int dihedral_numbers, int atom_numbers, const int *uint_crd_f, const float *scaler_f,
|
||||
const int *atom_a, const int *atom_b, const int *atom_c, const int *atom_d,
|
||||
const int *ipn, const float *pk, const float *gamc, const float *gams,
|
||||
const float *pn, float *frc_f, cudaStream_t stream);
|
||||
#endif
|
|
@ -1,125 +0,0 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/sponge/dihedral/dihedral_force_with_atom_energy_impl.cuh"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/util.cuh"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common_sponge.cuh"
|
||||
|
||||
__global__ void DihedralForceWithAtomEnergyKernel(int dihedral_numbers, const UNSIGNED_INT_VECTOR *uint_crd,
|
||||
const VECTOR *scaler, const int *atom_a, const int *atom_b,
|
||||
const int *atom_c, const int *atom_d, const int *ipn, const float *pk,
|
||||
const float *gamc, const float *gams, const float *pn, VECTOR *frc,
|
||||
float *ene) {
|
||||
int dihedral_i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (dihedral_i < dihedral_numbers) {
|
||||
int atom_i = atom_a[dihedral_i];
|
||||
int atom_j = atom_b[dihedral_i];
|
||||
int atom_k = atom_c[dihedral_i];
|
||||
int atom_l = atom_d[dihedral_i];
|
||||
|
||||
int temp_ipn = ipn[dihedral_i];
|
||||
|
||||
float temp_pk = pk[dihedral_i];
|
||||
float temp_pn = pn[dihedral_i];
|
||||
float temp_gamc = gamc[dihedral_i];
|
||||
float temp_gams = gams[dihedral_i];
|
||||
|
||||
VECTOR drij = Get_Periodic_Displacement(uint_crd[atom_i], uint_crd[atom_j], scaler[0]);
|
||||
VECTOR drkj = Get_Periodic_Displacement(uint_crd[atom_k], uint_crd[atom_j], scaler[0]);
|
||||
VECTOR drkl = Get_Periodic_Displacement(uint_crd[atom_k], uint_crd[atom_l], scaler[0]);
|
||||
|
||||
VECTOR r1 = drij ^ drkj;
|
||||
VECTOR r2 = drkl ^ drkj;
|
||||
|
||||
float r1_1 = rnorm3df(r1.x, r1.y, r1.z);
|
||||
float r2_1 = rnorm3df(r2.x, r2.y, r2.z);
|
||||
float r1_2 = r1_1 * r1_1;
|
||||
float r2_2 = r2_1 * r2_1;
|
||||
float r1_1_r2_1 = r1_1 * r2_1;
|
||||
|
||||
float phi = r1 * r2 * r1_1_r2_1;
|
||||
phi = fmaxf(-0.999999, fminf(phi, 0.999999));
|
||||
phi = acosf(phi);
|
||||
|
||||
float sign = (r2 ^ r1) * drkj;
|
||||
copysignf(phi, sign);
|
||||
|
||||
phi = CONSTANT_Pi - phi;
|
||||
|
||||
float nphi = temp_pn * phi;
|
||||
|
||||
float cos_phi = cosf(phi);
|
||||
float sin_phi = sinf(phi);
|
||||
float cos_nphi = cosf(nphi);
|
||||
float sin_nphi = sinf(nphi);
|
||||
|
||||
float dE_dphi;
|
||||
if (fabsf(sin_phi) < 1e-6) {
|
||||
temp_ipn *= (((temp_ipn - 1) & 1) ^ 1);
|
||||
dE_dphi = temp_gamc * (temp_pn - temp_ipn + temp_ipn * cos_phi);
|
||||
} else {
|
||||
dE_dphi = temp_pn * (temp_gamc * sin_nphi - temp_gams * cos_nphi) / sin_phi;
|
||||
}
|
||||
|
||||
VECTOR dphi_dr1 = r1_1_r2_1 * r2 + cos_phi * r1_2 * r1;
|
||||
VECTOR dphi_dr2 = r1_1_r2_1 * r1 + cos_phi * r2_2 * r2;
|
||||
|
||||
VECTOR dE_dri = dE_dphi * drkj ^ dphi_dr1;
|
||||
VECTOR dE_drl = dE_dphi * dphi_dr2 ^ drkj;
|
||||
VECTOR dE_drj_part = dE_dphi * ((drij ^ dphi_dr1) + (drkl ^ dphi_dr2));
|
||||
|
||||
VECTOR fi = dE_dri;
|
||||
VECTOR fj = dE_drj_part - dE_dri;
|
||||
VECTOR fk = -dE_drl - dE_drj_part;
|
||||
VECTOR fl = dE_drl;
|
||||
|
||||
atomicAdd(&frc[atom_i].x, fi.x);
|
||||
atomicAdd(&frc[atom_i].y, fi.y);
|
||||
atomicAdd(&frc[atom_i].z, fi.z);
|
||||
atomicAdd(&frc[atom_j].x, fj.x);
|
||||
atomicAdd(&frc[atom_j].y, fj.y);
|
||||
atomicAdd(&frc[atom_j].z, fj.z);
|
||||
atomicAdd(&frc[atom_k].x, fk.x);
|
||||
atomicAdd(&frc[atom_k].y, fk.y);
|
||||
atomicAdd(&frc[atom_k].z, fk.z);
|
||||
atomicAdd(&frc[atom_l].x, fl.x);
|
||||
atomicAdd(&frc[atom_l].y, fl.y);
|
||||
atomicAdd(&frc[atom_l].z, fl.z);
|
||||
|
||||
atomicAdd(&ene[atom_i], (temp_pk + cos_nphi * temp_gamc + sin_nphi * temp_gams));
|
||||
}
|
||||
}
|
||||
|
||||
void DihedralForceWithAtomEnergy(int dihedral_numbers, int atom_numbers, const int *uint_crd_f, const float *scaler_f,
|
||||
const int *atom_a, const int *atom_b, const int *atom_c, const int *atom_d,
|
||||
const int *ipn, const float *pk, const float *gamc, const float *gams, const float *pn,
|
||||
float *frc_f, float *ene, cudaStream_t stream) {
|
||||
Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128, 0, stream>>>(3 * atom_numbers, frc_f, 0.);
|
||||
size_t thread_per_block = 128;
|
||||
size_t block_per_grid = ceilf(static_cast<float>(dihedral_numbers) / 128);
|
||||
UNSIGNED_INT_VECTOR *uint_crd =
|
||||
const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f));
|
||||
VECTOR *frc = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(frc_f));
|
||||
VECTOR *scaler = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(scaler_f));
|
||||
|
||||
DihedralForceWithAtomEnergyKernel<<<block_per_grid, thread_per_block, 0, stream>>>(
|
||||
dihedral_numbers, uint_crd, scaler, atom_a, atom_b, atom_c, atom_d, ipn, pk, gamc, gams, pn, frc, ene);
|
||||
return;
|
||||
}
|
||||
void DihedralForceWithAtomEnergy(int dihedral_numbers, int atom_numbers, const int *uint_crd_f, const float *scaler_f,
|
||||
const int *atom_a, const int *atom_b, const int *atom_c, const int *atom_d,
|
||||
const int *ipn, const float *pk, const float *gamc, const float *gams, const float *pn,
|
||||
float *frc_f, float *ene, cudaStream_t stream);
|
|
@ -1,29 +0,0 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_DIHEDRAL_DIHEDRAL_FORCE_WITH_ATOM_ENERGY_IMPL_H_
|
||||
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_DIHEDRAL_DIHEDRAL_FORCE_WITH_ATOM_ENERGY_IMPL_H_
|
||||
|
||||
#include <curand_kernel.h>
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
|
||||
|
||||
CUDA_LIB_EXPORT void DihedralForceWithAtomEnergy(int dihedral_numbers, int atom_numbers, const int *uint_crd_f,
|
||||
const float *scaler_f, const int *atom_a, const int *atom_b,
|
||||
const int *atom_c, const int *atom_d, const int *ipn, const float *pk,
|
||||
const float *gamc, const float *gams, const float *pn, float *frc_f,
|
||||
float *ene, cudaStream_t stream);
|
||||
#endif
|
|
@ -1,144 +0,0 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
/**
|
||||
* Note:
|
||||
* LJForce. This is an experimental interface that is subject to change and/or deletion.
|
||||
*/
|
||||
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/sponge/lj/lj_direct_cf_force_with_lj_virial_direct_cf_energy_impl.cuh"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common_sponge.cuh"
|
||||
|
||||
__global__ void LJ_Direct_CF_Force_With_LJ_Virial_Direct_CF_Energy_CUDA(
|
||||
const int atom_numbers, const NEIGHBOR_LIST *nl, const UINT_VECTOR_LJ_TYPE *uint_crd, const VECTOR *boxlength,
|
||||
const float *LJ_type_A, const float *LJ_type_B, const float cutoff, VECTOR *frc, const float pme_beta,
|
||||
const float sqrt_pi, float *atom_lj_virial, float *atom_direct_cf_energy) {
|
||||
int atom_i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (atom_i < atom_numbers) {
|
||||
NEIGHBOR_LIST nl_i = nl[atom_i];
|
||||
int N = nl_i.atom_numbers;
|
||||
int atom_j;
|
||||
int int_x;
|
||||
int int_y;
|
||||
int int_z;
|
||||
UINT_VECTOR_LJ_TYPE r1 = uint_crd[atom_i], r2;
|
||||
VECTOR dr;
|
||||
float dr_2;
|
||||
float dr_4;
|
||||
float dr_8;
|
||||
float dr_6;
|
||||
float frc_abs = 0.;
|
||||
VECTOR frc_lin;
|
||||
VECTOR frc_record = {0., 0., 0.};
|
||||
float charge_i = r1.charge;
|
||||
float charge_j;
|
||||
float dr_abs;
|
||||
float dr_1;
|
||||
float beta_dr;
|
||||
float frc_cf_abs;
|
||||
float virial_lin = 0.;
|
||||
float energy_lin = 0.;
|
||||
|
||||
int x, y;
|
||||
int atom_pair_LJ_type;
|
||||
for (int j = threadIdx.y; j < N; j = j + blockDim.y) {
|
||||
atom_j = nl_i.atom_serial[j];
|
||||
r2 = uint_crd[atom_j];
|
||||
charge_j = r2.charge;
|
||||
|
||||
int_x = r2.uint_x - r1.uint_x;
|
||||
int_y = r2.uint_y - r1.uint_y;
|
||||
int_z = r2.uint_z - r1.uint_z;
|
||||
dr.x = boxlength[0].x * int_x;
|
||||
dr.y = boxlength[0].y * int_y;
|
||||
dr.z = boxlength[0].z * int_z;
|
||||
dr_abs = norm3df(dr.x, dr.y, dr.z);
|
||||
if (dr_abs < cutoff) {
|
||||
dr_1 = 1. / dr_abs;
|
||||
dr_2 = dr_1 * dr_1;
|
||||
dr_4 = dr_2 * dr_2;
|
||||
dr_8 = dr_4 * dr_4;
|
||||
dr_6 = dr_4 * dr_2;
|
||||
|
||||
y = (r2.LJ_type - r1.LJ_type);
|
||||
x = y >> 31;
|
||||
y = (y ^ x) - x;
|
||||
x = r2.LJ_type + r1.LJ_type;
|
||||
r2.LJ_type = (x + y) >> 1;
|
||||
x = (x - y) >> 1;
|
||||
atom_pair_LJ_type = (r2.LJ_type * (r2.LJ_type + 1) >> 1) + x;
|
||||
|
||||
frc_abs = (-LJ_type_A[atom_pair_LJ_type] * dr_6 + LJ_type_B[atom_pair_LJ_type]) * dr_8;
|
||||
beta_dr = pme_beta * dr_abs;
|
||||
frc_cf_abs = beta_dr * sqrt_pi * expf(-beta_dr * beta_dr) + erfcf(beta_dr);
|
||||
frc_cf_abs = frc_cf_abs * dr_2 * dr_1;
|
||||
frc_cf_abs = charge_i * charge_j * frc_cf_abs;
|
||||
energy_lin = energy_lin + charge_i * charge_j * erfcf(beta_dr) * dr_1;
|
||||
virial_lin = virial_lin - frc_abs * dr_abs * dr_abs;
|
||||
frc_abs = frc_abs - frc_cf_abs;
|
||||
frc_lin.x = frc_abs * dr.x;
|
||||
frc_lin.y = frc_abs * dr.y;
|
||||
frc_lin.z = frc_abs * dr.z;
|
||||
frc_record.x = frc_record.x + frc_lin.x;
|
||||
frc_record.y = frc_record.y + frc_lin.y;
|
||||
frc_record.z = frc_record.z + frc_lin.z;
|
||||
atomicAdd(&frc[atom_j].x, -frc_lin.x);
|
||||
atomicAdd(&frc[atom_j].y, -frc_lin.y);
|
||||
atomicAdd(&frc[atom_j].z, -frc_lin.z);
|
||||
}
|
||||
}
|
||||
atomicAdd(&frc[atom_i].x, frc_record.x);
|
||||
atomicAdd(&frc[atom_i].y, frc_record.y);
|
||||
atomicAdd(&frc[atom_i].z, frc_record.z);
|
||||
|
||||
atomicAdd(&atom_direct_cf_energy[atom_i], energy_lin);
|
||||
atomicAdd(&atom_lj_virial[atom_i], virial_lin);
|
||||
}
|
||||
}
|
||||
|
||||
void LJ_Direct_CF_Force_With_LJ_Virial_Direct_CF_Energy(
|
||||
const int atom_numbers, const float cutoff, const float pme_beta, const unsigned int *uint_crd_f, const int *LJtype,
|
||||
const float *charge, const float *scaler_f, float *uint_crd_with_LJ, int *nl_atom_numbers, int *nl_atom_serial,
|
||||
int *nl, const float *d_LJ_A, const float *d_LJ_B, float *frc_f, float *atom_lj_virial, float *atom_energy,
|
||||
int max_neighbor_numbers, cudaStream_t stream) {
|
||||
Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128, 0, stream>>>(3 * atom_numbers, frc_f, 0.);
|
||||
Reset_List<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>(atom_numbers, atom_energy, 0.);
|
||||
Reset_List<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>(atom_numbers, atom_lj_virial, 0.);
|
||||
VECTOR *frc = reinterpret_cast<VECTOR *>(frc_f);
|
||||
VECTOR *scaler = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(scaler_f));
|
||||
NEIGHBOR_LIST *nl_a = reinterpret_cast<NEIGHBOR_LIST *>(nl);
|
||||
construct_neighbor_list_kernel<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>(
|
||||
atom_numbers, max_neighbor_numbers, nl_atom_numbers, nl_atom_serial, nl_a);
|
||||
|
||||
UINT_VECTOR_LJ_TYPE *uint_crd_with_LJ_a = reinterpret_cast<UINT_VECTOR_LJ_TYPE *>(uint_crd_with_LJ);
|
||||
|
||||
UNSIGNED_INT_VECTOR *uint_crd =
|
||||
const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f));
|
||||
|
||||
Copy_Crd_To_New_Crd_Start<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
|
||||
atom_numbers, uint_crd, uint_crd_with_LJ_a, LJtype, charge);
|
||||
|
||||
LJ_Direct_CF_Force_With_LJ_Virial_Direct_CF_Energy_CUDA<<<ceilf(static_cast<float>(atom_numbers) / 8), thread_LJ, 0,
|
||||
stream>>>(
|
||||
atom_numbers, nl_a, uint_crd_with_LJ_a, scaler, d_LJ_A, d_LJ_B, cutoff, frc, pme_beta, TWO_DIVIDED_BY_SQRT_PI,
|
||||
atom_lj_virial, atom_energy);
|
||||
return;
|
||||
}
|
||||
|
||||
void LJ_Direct_CF_Force_With_LJ_Virial_Direct_CF_Energy(
|
||||
const int atom_numbers, const float cutoff, const float pme_beta, const unsigned int *uint_crd_f, const int *LJtype,
|
||||
const float *charge, const float *scaler_f, float *uint_crd_with_LJ, int *nl_atom_numbers, int *nl_atom_serial,
|
||||
int *nl, const float *d_LJ_A, const float *d_LJ_B, float *frc_f, float *atom_lj_virial, float *atom_energy,
|
||||
int max_neighbor_numbers, cudaStream_t stream);
|
|
@ -1,34 +0,0 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
/**
|
||||
* Note:
|
||||
* LJForce. This is an experimental interface that is subject to change and/or deletion.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_LJ_DIRECT_CF_FORCE_WITH_LJ_VIRIAL_DIRECT_CF_ENERGY_IMPL_H_
|
||||
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_LJ_DIRECT_CF_FORCE_WITH_LJ_VIRIAL_DIRECT_CF_ENERGY_IMPL_H_
|
||||
|
||||
#include <curand_kernel.h>
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
|
||||
|
||||
CUDA_LIB_EXPORT void LJ_Direct_CF_Force_With_LJ_Virial_Direct_CF_Energy(
|
||||
const int atom_numbers, const float cutoff, const float pme_beta, const unsigned int *uint_crd_f, const int *LJtype,
|
||||
const float *charge, const float *scaler_f, float *uint_crd_with_LJ, int *nl_atom_numbers, int *nl_atom_serial,
|
||||
int *nl, const float *d_LJ_A, const float *d_LJ_B, float *frc_f, float *atom_lj_virial, float *atom_energy,
|
||||
int max_neighbor_numbers, cudaStream_t stream);
|
||||
|
||||
#endif
|
|
@ -1,102 +0,0 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/sponge/lj/lj_energy_impl.cuh"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common_sponge.cuh"
|
||||
|
||||
__global__ void LJ_Energy_CUDA(const int atom_numbers, const NEIGHBOR_LIST *nl, const UINT_VECTOR_LJ_TYPE *uint_crd,
|
||||
const VECTOR *boxlength, const float *LJ_type_A, const float *LJ_type_B,
|
||||
const float cutoff_square, float *lj_ene) {
|
||||
int atom_i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (atom_i < atom_numbers) {
|
||||
NEIGHBOR_LIST nl_i = nl[atom_i];
|
||||
int N = nl_i.atom_numbers;
|
||||
int atom_j;
|
||||
int int_x;
|
||||
int int_y;
|
||||
int int_z;
|
||||
UINT_VECTOR_LJ_TYPE r1 = uint_crd[atom_i], r2;
|
||||
VECTOR dr;
|
||||
float dr2;
|
||||
float dr_2;
|
||||
float dr_4;
|
||||
float dr_6;
|
||||
float ene_lin = 0.;
|
||||
|
||||
int x, y;
|
||||
int atom_pair_LJ_type;
|
||||
for (int j = threadIdx.y; j < N; j = j + blockDim.y) {
|
||||
atom_j = nl_i.atom_serial[j];
|
||||
r2 = uint_crd[atom_j];
|
||||
|
||||
int_x = r2.uint_x - r1.uint_x;
|
||||
int_y = r2.uint_y - r1.uint_y;
|
||||
int_z = r2.uint_z - r1.uint_z;
|
||||
dr.x = boxlength[0].x * int_x;
|
||||
dr.y = boxlength[0].y * int_y;
|
||||
dr.z = boxlength[0].z * int_z;
|
||||
|
||||
dr2 = dr.x * dr.x + dr.y * dr.y + dr.z * dr.z;
|
||||
if (dr2 < cutoff_square) {
|
||||
dr_2 = 1. / dr2;
|
||||
dr_4 = dr_2 * dr_2;
|
||||
dr_6 = dr_4 * dr_2;
|
||||
|
||||
y = (r2.LJ_type - r1.LJ_type);
|
||||
x = y >> 31;
|
||||
y = (y ^ x) - x;
|
||||
x = r2.LJ_type + r1.LJ_type;
|
||||
r2.LJ_type = (x + y) >> 1;
|
||||
x = (x - y) >> 1;
|
||||
atom_pair_LJ_type = (r2.LJ_type * (r2.LJ_type + 1) >> 1) + x;
|
||||
|
||||
dr_2 = (0.083333333 * LJ_type_A[atom_pair_LJ_type] * dr_6 - 0.166666666 * LJ_type_B[atom_pair_LJ_type]) * dr_6;
|
||||
ene_lin = ene_lin + dr_2;
|
||||
}
|
||||
}
|
||||
atomicAdd(&lj_ene[atom_i], ene_lin);
|
||||
}
|
||||
}
|
||||
|
||||
void LJEnergy(const int atom_numbers, const float cutoff_square, const int *uint_crd_f, const int *LJtype,
|
||||
const float *charge, const float *scaler_f, float *uint_crd_with_LJ, int *nl_atom_numbers,
|
||||
int *nl_atom_serial, int *nl, const float *d_LJ_A, const float *d_LJ_B, float *d_LJ_energy_atom,
|
||||
cudaStream_t stream) {
|
||||
VECTOR *scaler = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(scaler_f));
|
||||
int max_neighbor_numbers = 800;
|
||||
NEIGHBOR_LIST *nl_a = reinterpret_cast<NEIGHBOR_LIST *>(nl);
|
||||
construct_neighbor_list_kernel<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>(
|
||||
atom_numbers, max_neighbor_numbers, nl_atom_numbers, nl_atom_serial, nl_a);
|
||||
|
||||
UINT_VECTOR_LJ_TYPE *uint_crd_with_LJ_a = reinterpret_cast<UINT_VECTOR_LJ_TYPE *>(uint_crd_with_LJ);
|
||||
|
||||
UNSIGNED_INT_VECTOR *uint_crd =
|
||||
const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f));
|
||||
|
||||
Copy_Crd_To_New_Crd_Start<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
|
||||
atom_numbers, uint_crd, uint_crd_with_LJ_a, LJtype, charge);
|
||||
|
||||
Reset_List<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(atom_numbers, d_LJ_energy_atom, 0.);
|
||||
|
||||
LJ_Energy_CUDA<<<ceilf(static_cast<float>(atom_numbers) / 8), thread_LJ, 0, stream>>>(
|
||||
atom_numbers, nl_a, uint_crd_with_LJ_a, scaler, d_LJ_A, d_LJ_B, cutoff_square, d_LJ_energy_atom);
|
||||
|
||||
return;
|
||||
}
|
||||
void LJEnergy(const int atom_numbers, const float cutoff_square, const int *uint_crd_f, const int *LJtype,
|
||||
const float *charge, const float *scaler_f, float *uint_crd_with_LJ, int *nl_atom_numbers,
|
||||
int *nl_atom_serial, int *nl, const float *d_LJ_A, const float *d_LJ_B, float *d_LJ_energy_atom,
|
||||
cudaStream_t stream);
|
|
@ -1,28 +0,0 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_LJ_LJ_ENERGY_IMPL_H_
|
||||
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_LJ_LJ_ENERGY_IMPL_H_
|
||||
|
||||
#include <curand_kernel.h>
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
|
||||
|
||||
CUDA_LIB_EXPORT void LJEnergy(const int atom_numbers, const float cutoff_square, const int *uint_crd_f,
|
||||
const int *LJtype, const float *charge, const float *scaler_f, float *uint_crd_with_LJ,
|
||||
int *nl_atom_numbers, int *nl_atom_serial, int *nl, const float *d_LJ_A,
|
||||
const float *d_LJ_B, float *d_LJ_energy_atom, cudaStream_t stream);
|
||||
#endif
|
|
@ -1,117 +0,0 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/sponge/lj/lj_force_impl.cuh"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common_sponge.cuh"
|
||||
|
||||
__global__ void LJ_Force_CUDA(const int atom_numbers, const NEIGHBOR_LIST *nl, const UINT_VECTOR_LJ_TYPE *uint_crd,
|
||||
const VECTOR *boxlength, const float *LJ_type_A, const float *LJ_type_B,
|
||||
const float cutoff_square, VECTOR *frc) {
|
||||
int atom_i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (atom_i < atom_numbers) {
|
||||
NEIGHBOR_LIST nl_i = nl[atom_i];
|
||||
int N = nl_i.atom_numbers;
|
||||
int B = ceilf(static_cast<float>(N) / blockDim.y);
|
||||
int atom_j;
|
||||
int int_x;
|
||||
int int_y;
|
||||
int int_z;
|
||||
UINT_VECTOR_LJ_TYPE r1 = uint_crd[atom_i], r2;
|
||||
VECTOR dr;
|
||||
float dr2;
|
||||
float dr_2;
|
||||
float dr_4;
|
||||
float dr_8;
|
||||
float dr_14;
|
||||
float frc_abs = 0.;
|
||||
VECTOR frc_lin;
|
||||
VECTOR frc_record = {0., 0., 0.};
|
||||
|
||||
int x, y;
|
||||
int atom_pair_LJ_type;
|
||||
for (int j = threadIdx.y * B; j < (threadIdx.y + 1) * B; j = j + 1) {
|
||||
if (j < N) {
|
||||
atom_j = nl_i.atom_serial[j];
|
||||
r2 = uint_crd[atom_j];
|
||||
int_x = r2.uint_x - r1.uint_x;
|
||||
int_y = r2.uint_y - r1.uint_y;
|
||||
int_z = r2.uint_z - r1.uint_z;
|
||||
dr.x = boxlength[0].x * int_x;
|
||||
dr.y = boxlength[0].y * int_y;
|
||||
dr.z = boxlength[0].z * int_z;
|
||||
dr2 = dr.x * dr.x + dr.y * dr.y + dr.z * dr.z;
|
||||
if (dr2 < cutoff_square) {
|
||||
dr_2 = 1. / dr2;
|
||||
dr_4 = dr_2 * dr_2;
|
||||
dr_8 = dr_4 * dr_4;
|
||||
dr_14 = dr_8 * dr_4 * dr_2;
|
||||
|
||||
y = (r2.LJ_type - r1.LJ_type);
|
||||
x = y >> 31;
|
||||
y = (y ^ x) - x;
|
||||
x = r2.LJ_type + r1.LJ_type;
|
||||
r2.LJ_type = (x + y) >> 1;
|
||||
x = (x - y) >> 1;
|
||||
atom_pair_LJ_type = (r2.LJ_type * (r2.LJ_type + 1) >> 1) + x;
|
||||
|
||||
frc_abs = -LJ_type_A[atom_pair_LJ_type] * dr_14 + LJ_type_B[atom_pair_LJ_type] * dr_8;
|
||||
frc_lin.x = frc_abs * dr.x;
|
||||
frc_lin.y = frc_abs * dr.y;
|
||||
frc_lin.z = frc_abs * dr.z;
|
||||
|
||||
frc_record.x = frc_record.x + frc_lin.x;
|
||||
frc_record.y = frc_record.y + frc_lin.y;
|
||||
frc_record.z = frc_record.z + frc_lin.z;
|
||||
|
||||
atomicAdd(&frc[atom_j].x, -frc_lin.x);
|
||||
atomicAdd(&frc[atom_j].y, -frc_lin.y);
|
||||
atomicAdd(&frc[atom_j].z, -frc_lin.z);
|
||||
}
|
||||
}
|
||||
}
|
||||
atomicAdd(&frc[atom_i].x, frc_record.x);
|
||||
atomicAdd(&frc[atom_i].y, frc_record.y);
|
||||
atomicAdd(&frc[atom_i].z, frc_record.z);
|
||||
}
|
||||
}
|
||||
|
||||
void LJForce(const int atom_numbers, const float cutoff_square, const int *uint_crd_f, const int *LJtype,
|
||||
const float *charge, const float *scaler_f, float *uint_crd_with_LJ, int *nl_atom_numbers,
|
||||
int *nl_atom_serial, int *nl, const float *d_LJ_A, const float *d_LJ_B, float *frc_f,
|
||||
cudaStream_t stream) {
|
||||
Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128, 0, stream>>>(3 * atom_numbers, frc_f, 0.);
|
||||
VECTOR *frc = reinterpret_cast<VECTOR *>(frc_f);
|
||||
VECTOR *scaler = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(scaler_f));
|
||||
int max_neighbor_numbers = 800;
|
||||
NEIGHBOR_LIST *nl_a = reinterpret_cast<NEIGHBOR_LIST *>(nl);
|
||||
construct_neighbor_list_kernel<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>(
|
||||
atom_numbers, max_neighbor_numbers, nl_atom_numbers, nl_atom_serial, nl_a);
|
||||
|
||||
UINT_VECTOR_LJ_TYPE *uint_crd_with_LJ_a = reinterpret_cast<UINT_VECTOR_LJ_TYPE *>(uint_crd_with_LJ);
|
||||
|
||||
UNSIGNED_INT_VECTOR *uint_crd =
|
||||
const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f));
|
||||
|
||||
Copy_Crd_To_New_Crd_Start<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
|
||||
atom_numbers, uint_crd, uint_crd_with_LJ_a, LJtype, charge);
|
||||
|
||||
LJ_Force_CUDA<<<ceilf(static_cast<float>(atom_numbers) / 8), thread_LJ, 0, stream>>>(
|
||||
atom_numbers, nl_a, uint_crd_with_LJ_a, scaler, d_LJ_A, d_LJ_B, cutoff_square, frc);
|
||||
return;
|
||||
}
|
||||
void LJForce(const int atom_numbers, const float cutoff_square, const int *uint_crd_f, const int *LJtype,
|
||||
const float *charge, const float *scaler_f, float *uint_crd_with_LJ, int *nl_atom_numbers,
|
||||
int *nl_atom_serial, int *nl, const float *d_LJ_A, const float *d_LJ_B, float *frc_f, cudaStream_t stream);
|
|
@ -1,29 +0,0 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_LJ_LJ_FORCE_IMPL_H_
|
||||
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_LJ_LJ_FORCE_IMPL_H_
|
||||
|
||||
#include <curand_kernel.h>
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
|
||||
|
||||
CUDA_LIB_EXPORT void LJForce(const int atom_numbers, const float cutoff_square, const int *uint_crd_f,
|
||||
const int *LJtype, const float *charge, const float *scaler_f, float *uint_crd_with_LJ,
|
||||
int *nl_atom_numbers, int *nl_atom_serial, int *nl, const float *d_LJ_A,
|
||||
const float *d_LJ_B, float *frc_f, cudaStream_t stream);
|
||||
|
||||
#endif
|
|
@ -1,133 +0,0 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/sponge/lj/lj_force_with_pme_direct_force_impl.cuh"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common_sponge.cuh"
|
||||
|
||||
__global__ void LJ_Force_With_Direct_CF_CUDA(const int atom_numbers, const NEIGHBOR_LIST *nl,
|
||||
const UINT_VECTOR_LJ_TYPE *uint_crd, const VECTOR *boxlength,
|
||||
const float *LJ_type_A, const float *LJ_type_B, const float cutoff,
|
||||
VECTOR *frc, const float pme_beta, const float sqrt_pi) {
|
||||
int atom_i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (atom_i < atom_numbers) {
|
||||
NEIGHBOR_LIST nl_i = nl[atom_i];
|
||||
int N = nl_i.atom_numbers;
|
||||
int atom_j;
|
||||
int int_x;
|
||||
int int_y;
|
||||
int int_z;
|
||||
UINT_VECTOR_LJ_TYPE r1 = uint_crd[atom_i], r2;
|
||||
VECTOR dr;
|
||||
float dr_2;
|
||||
float dr_4;
|
||||
float dr_8;
|
||||
float dr_6;
|
||||
float frc_abs = 0.;
|
||||
VECTOR frc_lin;
|
||||
VECTOR frc_record = {0., 0., 0.};
|
||||
|
||||
float charge_i = r1.charge;
|
||||
float charge_j;
|
||||
float dr_abs;
|
||||
float dr_1;
|
||||
float beta_dr;
|
||||
float frc_cf_abs;
|
||||
|
||||
int x, y;
|
||||
int atom_pair_LJ_type;
|
||||
for (int j = threadIdx.y; j < N; j = j + blockDim.y) {
|
||||
atom_j = nl_i.atom_serial[j];
|
||||
r2 = uint_crd[atom_j];
|
||||
charge_j = r2.charge;
|
||||
|
||||
int_x = r2.uint_x - r1.uint_x;
|
||||
int_y = r2.uint_y - r1.uint_y;
|
||||
int_z = r2.uint_z - r1.uint_z;
|
||||
dr.x = boxlength[0].x * int_x;
|
||||
dr.y = boxlength[0].y * int_y;
|
||||
dr.z = boxlength[0].z * int_z;
|
||||
dr_abs = norm3df(dr.x, dr.y, dr.z);
|
||||
if (dr_abs < cutoff) {
|
||||
dr_1 = 1. / dr_abs;
|
||||
dr_2 = dr_1 * dr_1;
|
||||
dr_4 = dr_2 * dr_2;
|
||||
dr_8 = dr_4 * dr_4;
|
||||
dr_6 = dr_4 * dr_2;
|
||||
|
||||
y = (r2.LJ_type - r1.LJ_type);
|
||||
x = y >> 31;
|
||||
y = (y ^ x) - x;
|
||||
x = r2.LJ_type + r1.LJ_type;
|
||||
r2.LJ_type = (x + y) >> 1;
|
||||
x = (x - y) >> 1;
|
||||
atom_pair_LJ_type = (r2.LJ_type * (r2.LJ_type + 1) >> 1) + x;
|
||||
|
||||
frc_abs = (-LJ_type_A[atom_pair_LJ_type] * dr_6 + LJ_type_B[atom_pair_LJ_type]) * dr_8;
|
||||
beta_dr = pme_beta * dr_abs;
|
||||
frc_cf_abs = beta_dr * sqrt_pi * expf(-beta_dr * beta_dr) + erfcf(beta_dr);
|
||||
frc_cf_abs = frc_cf_abs * dr_2 * dr_1;
|
||||
frc_cf_abs = charge_i * charge_j * frc_cf_abs;
|
||||
|
||||
frc_abs = frc_abs - frc_cf_abs;
|
||||
|
||||
frc_lin.x = frc_abs * dr.x;
|
||||
frc_lin.y = frc_abs * dr.y;
|
||||
frc_lin.z = frc_abs * dr.z;
|
||||
|
||||
frc_record.x = frc_record.x + frc_lin.x;
|
||||
frc_record.y = frc_record.y + frc_lin.y;
|
||||
frc_record.z = frc_record.z + frc_lin.z;
|
||||
|
||||
atomicAdd(&frc[atom_j].x, -frc_lin.x);
|
||||
atomicAdd(&frc[atom_j].y, -frc_lin.y);
|
||||
atomicAdd(&frc[atom_j].z, -frc_lin.z);
|
||||
}
|
||||
}
|
||||
atomicAdd(&frc[atom_i].x, frc_record.x);
|
||||
atomicAdd(&frc[atom_i].y, frc_record.y);
|
||||
atomicAdd(&frc[atom_i].z, frc_record.z);
|
||||
}
|
||||
}
|
||||
|
||||
void LJForceWithPMEDirectForce(const int atom_numbers, const float cutoff, const float pme_beta, const int *uint_crd_f,
|
||||
const int *LJtype, const float *charge, const float *scaler_f, float *uint_crd_with_LJ,
|
||||
int *nl_atom_numbers, int *nl_atom_serial, int *nl, const float *d_LJ_A,
|
||||
const float *d_LJ_B, float *frc_f, cudaStream_t stream) {
|
||||
Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128, 0, stream>>>(3 * atom_numbers, frc_f, 0.);
|
||||
VECTOR *frc = reinterpret_cast<VECTOR *>(frc_f);
|
||||
VECTOR *scaler = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(scaler_f));
|
||||
int max_neighbor_numbers = 800;
|
||||
NEIGHBOR_LIST *nl_a = reinterpret_cast<NEIGHBOR_LIST *>(nl);
|
||||
construct_neighbor_list_kernel<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>(
|
||||
atom_numbers, max_neighbor_numbers, nl_atom_numbers, nl_atom_serial, nl_a);
|
||||
|
||||
UINT_VECTOR_LJ_TYPE *uint_crd_with_LJ_a = reinterpret_cast<UINT_VECTOR_LJ_TYPE *>(uint_crd_with_LJ);
|
||||
|
||||
UNSIGNED_INT_VECTOR *uint_crd =
|
||||
const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f));
|
||||
|
||||
Copy_Crd_To_New_Crd_Start<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
|
||||
atom_numbers, uint_crd, uint_crd_with_LJ_a, LJtype, charge);
|
||||
|
||||
LJ_Force_With_Direct_CF_CUDA<<<ceilf(static_cast<float>(atom_numbers) / 8), thread_LJ, 0, stream>>>(
|
||||
atom_numbers, nl_a, uint_crd_with_LJ_a, scaler, d_LJ_A, d_LJ_B, cutoff, frc, pme_beta, TWO_DIVIDED_BY_SQRT_PI);
|
||||
return;
|
||||
}
|
||||
|
||||
void LJForceWithPMEDirectForce(const int atom_numbers, const float cutoff, const float pme_beta, const int *uint_crd_f,
|
||||
const int *LJtype, const float *charge, const float *scaler_f, float *uint_crd_with_LJ,
|
||||
int *nl_atom_numbers, int *nl_atom_serial, int *nl, const float *d_LJ_A,
|
||||
const float *d_LJ_B, float *frc_f, cudaStream_t stream);
|
|
@ -1,30 +0,0 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_LJ_LJ_FORCE_WITH_PME_DIRECT_FORCE_IMPL_H_
|
||||
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_LJ_LJ_FORCE_WITH_PME_DIRECT_FORCE_IMPL_H_
|
||||
|
||||
#include <curand_kernel.h>
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
|
||||
|
||||
CUDA_LIB_EXPORT void LJForceWithPMEDirectForce(const int atom_numbers, const float cutoff, const float pme_beta,
|
||||
const int *uint_crd_f, const int *LJtype, const float *charge,
|
||||
const float *scaler_f, float *uint_crd_with_LJ, int *nl_atom_numbers,
|
||||
int *nl_atom_serial, int *nl, const float *d_LJ_A, const float *d_LJ_B,
|
||||
float *frc_f, cudaStream_t stream);
|
||||
|
||||
#endif
|
|
@ -1,147 +0,0 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/sponge/lj/lj_pme_direct_force_with_atom_energy_impl.cuh"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common_sponge.cuh"
|
||||
|
||||
__global__ void LJ_Direct_CF_Force_With_Atom_Energy_CUDA(const int atom_numbers, const NEIGHBOR_LIST *nl,
|
||||
const UINT_VECTOR_LJ_TYPE *uint_crd, const VECTOR *boxlength,
|
||||
const float *LJ_type_A, const float *LJ_type_B,
|
||||
const float cutoff, VECTOR *frc, const float pme_beta,
|
||||
const float sqrt_pi, float *atom_energy) {
|
||||
int atom_i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (atom_i < atom_numbers) {
|
||||
NEIGHBOR_LIST nl_i = nl[atom_i];
|
||||
int N = nl_i.atom_numbers;
|
||||
int atom_j;
|
||||
int int_x;
|
||||
int int_y;
|
||||
int int_z;
|
||||
UINT_VECTOR_LJ_TYPE r1 = uint_crd[atom_i], r2;
|
||||
VECTOR dr;
|
||||
float dr_2;
|
||||
float dr_4;
|
||||
float dr_8;
|
||||
float dr_6;
|
||||
float frc_abs = 0.;
|
||||
VECTOR frc_lin;
|
||||
VECTOR frc_record = {0., 0., 0.};
|
||||
|
||||
float charge_i = r1.charge;
|
||||
float charge_j;
|
||||
float dr_abs;
|
||||
float dr_1;
|
||||
float beta_dr;
|
||||
float frc_cf_abs;
|
||||
|
||||
float ene_lin = 0.;
|
||||
float ene_lin2 = 0.;
|
||||
|
||||
int x, y;
|
||||
int atom_pair_LJ_type;
|
||||
for (int j = threadIdx.y; j < N; j = j + blockDim.y) {
|
||||
atom_j = nl_i.atom_serial[j];
|
||||
r2 = uint_crd[atom_j];
|
||||
charge_j = r2.charge;
|
||||
|
||||
int_x = r2.uint_x - r1.uint_x;
|
||||
int_y = r2.uint_y - r1.uint_y;
|
||||
int_z = r2.uint_z - r1.uint_z;
|
||||
dr.x = boxlength[0].x * int_x;
|
||||
dr.y = boxlength[0].y * int_y;
|
||||
dr.z = boxlength[0].z * int_z;
|
||||
dr_abs = norm3df(dr.x, dr.y, dr.z);
|
||||
if (dr_abs < cutoff) {
|
||||
dr_1 = 1. / dr_abs;
|
||||
dr_2 = dr_1 * dr_1;
|
||||
dr_4 = dr_2 * dr_2;
|
||||
dr_8 = dr_4 * dr_4;
|
||||
dr_6 = dr_4 * dr_2;
|
||||
|
||||
y = (r2.LJ_type - r1.LJ_type);
|
||||
x = y >> 31;
|
||||
y = (y ^ x) - x;
|
||||
x = r2.LJ_type + r1.LJ_type;
|
||||
r2.LJ_type = (x + y) >> 1;
|
||||
x = (x - y) >> 1;
|
||||
atom_pair_LJ_type = (r2.LJ_type * (r2.LJ_type + 1) >> 1) + x;
|
||||
|
||||
frc_abs = (-LJ_type_A[atom_pair_LJ_type] * dr_6 + LJ_type_B[atom_pair_LJ_type]) * dr_8;
|
||||
beta_dr = pme_beta * dr_abs;
|
||||
frc_cf_abs = beta_dr * sqrt_pi * expf(-beta_dr * beta_dr) + erfcf(beta_dr);
|
||||
frc_cf_abs = frc_cf_abs * dr_2 * dr_1;
|
||||
frc_cf_abs = charge_i * charge_j * frc_cf_abs;
|
||||
|
||||
ene_lin2 = ene_lin2 + charge_i * charge_j * erfcf(beta_dr) * dr_1;
|
||||
ene_lin =
|
||||
ene_lin +
|
||||
(0.083333333 * LJ_type_A[atom_pair_LJ_type] * dr_6 - 0.166666666 * LJ_type_B[atom_pair_LJ_type]) * dr_6;
|
||||
|
||||
frc_abs = frc_abs - frc_cf_abs;
|
||||
|
||||
frc_lin.x = frc_abs * dr.x;
|
||||
frc_lin.y = frc_abs * dr.y;
|
||||
frc_lin.z = frc_abs * dr.z;
|
||||
|
||||
frc_record.x = frc_record.x + frc_lin.x;
|
||||
frc_record.y = frc_record.y + frc_lin.y;
|
||||
frc_record.z = frc_record.z + frc_lin.z;
|
||||
|
||||
atomicAdd(&frc[atom_j].x, -frc_lin.x);
|
||||
atomicAdd(&frc[atom_j].y, -frc_lin.y);
|
||||
atomicAdd(&frc[atom_j].z, -frc_lin.z);
|
||||
}
|
||||
}
|
||||
atomicAdd(&frc[atom_i].x, frc_record.x);
|
||||
atomicAdd(&frc[atom_i].y, frc_record.y);
|
||||
atomicAdd(&frc[atom_i].z, frc_record.z);
|
||||
|
||||
atomicAdd(&atom_energy[atom_i], ene_lin + ene_lin2);
|
||||
}
|
||||
}
|
||||
|
||||
void LJDirectCFForceWithAtomEnergy(const int atom_numbers, const float cutoff, const float pme_beta,
|
||||
const int *uint_crd_f, const int *LJtype, const float *charge, const float *scaler_f,
|
||||
float *uint_crd_with_LJ, int *nl_atom_numbers, int *nl_atom_serial, int *nl,
|
||||
const float *d_LJ_A, const float *d_LJ_B, float *frc_f, float *atom_energy,
|
||||
cudaStream_t stream) {
|
||||
Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128, 0, stream>>>(3 * atom_numbers, frc_f, 0.);
|
||||
VECTOR *frc = reinterpret_cast<VECTOR *>(frc_f);
|
||||
VECTOR *scaler = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(scaler_f));
|
||||
int max_neighbor_numbers = 800;
|
||||
NEIGHBOR_LIST *nl_a = reinterpret_cast<NEIGHBOR_LIST *>(nl);
|
||||
construct_neighbor_list_kernel<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>(
|
||||
atom_numbers, max_neighbor_numbers, nl_atom_numbers, nl_atom_serial, nl_a);
|
||||
|
||||
UINT_VECTOR_LJ_TYPE *uint_crd_with_LJ_a = reinterpret_cast<UINT_VECTOR_LJ_TYPE *>(uint_crd_with_LJ);
|
||||
|
||||
UNSIGNED_INT_VECTOR *uint_crd =
|
||||
const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f));
|
||||
|
||||
Copy_Crd_To_New_Crd_Start<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
|
||||
atom_numbers, uint_crd, uint_crd_with_LJ_a, LJtype, charge);
|
||||
|
||||
LJ_Direct_CF_Force_With_Atom_Energy_CUDA<<<ceilf(static_cast<float>(atom_numbers) / 8), thread_LJ, 0, stream>>>(
|
||||
atom_numbers, nl_a, uint_crd_with_LJ_a, scaler, d_LJ_A, d_LJ_B, cutoff, frc, pme_beta, TWO_DIVIDED_BY_SQRT_PI,
|
||||
atom_energy);
|
||||
return;
|
||||
}
|
||||
|
||||
void LJDirectCFForceWithAtomEnergy(const int atom_numbers, const float cutoff, const float pme_beta,
|
||||
const int *uint_crd_f, const int *LJtype, const float *charge, const float *scaler_f,
|
||||
float *uint_crd_with_LJ, int *nl_atom_numbers, int *nl_atom_serial, int *nl,
|
||||
const float *d_LJ_A, const float *d_LJ_B, float *frc_f, float *atom_energy,
|
||||
cudaStream_t stream);
|
|
@ -1,31 +0,0 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_LJ_LJ_PME_DIRECT_FORCE_WITH_ATOM_ENERGY_IMPL_H_
|
||||
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_LJ_LJ_PME_DIRECT_FORCE_WITH_ATOM_ENERGY_IMPL_H_
|
||||
|
||||
#include <curand_kernel.h>
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
|
||||
|
||||
CUDA_LIB_EXPORT void LJDirectCFForceWithAtomEnergy(const int atom_numbers, const float cutoff, const float pme_beta,
|
||||
const int *uint_crd_f, const int *LJtype, const float *charge,
|
||||
const float *scaler_f, float *uint_crd_with_LJ, int *nl_atom_numbers,
|
||||
int *nl_atom_serial, int *nl, const float *d_LJ_A,
|
||||
const float *d_LJ_B, float *frc_f, float *atom_energy,
|
||||
cudaStream_t stream);
|
||||
|
||||
#endif
|
|
@ -1,78 +0,0 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/sponge/nb14/dihedral_14_cf_atom_energy_impl.cuh"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common_sponge.cuh"
|
||||
|
||||
__global__ void Dihedral14CFAtomEnergyKernel(const int dihedral_14_numbers, const UINT_VECTOR_LJ_TYPE *uint_crd,
|
||||
const VECTOR *boxlength, const int *a_14, const int *b_14,
|
||||
const float *cf_scale_factor, float *ene) {
|
||||
int dihedral_14_i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (dihedral_14_i < dihedral_14_numbers) {
|
||||
int atom_i = a_14[dihedral_14_i];
|
||||
int atom_j = b_14[dihedral_14_i];
|
||||
|
||||
UINT_VECTOR_LJ_TYPE r1 = uint_crd[atom_i];
|
||||
UINT_VECTOR_LJ_TYPE r2 = uint_crd[atom_j];
|
||||
|
||||
int int_x;
|
||||
int int_y;
|
||||
int int_z;
|
||||
VECTOR dr;
|
||||
float r_1;
|
||||
float ene_lin = 0.;
|
||||
|
||||
int_x = r2.uint_x - r1.uint_x;
|
||||
int_y = r2.uint_y - r1.uint_y;
|
||||
int_z = r2.uint_z - r1.uint_z;
|
||||
dr.x = boxlength[0].x * int_x;
|
||||
dr.y = boxlength[0].y * int_y;
|
||||
dr.z = boxlength[0].z * int_z;
|
||||
r_1 = rnorm3df(dr.x, dr.y, dr.z);
|
||||
|
||||
ene_lin = r1.charge * r2.charge * r_1;
|
||||
|
||||
ene_lin *= cf_scale_factor[dihedral_14_i];
|
||||
|
||||
atomicAdd(&ene[atom_i], ene_lin);
|
||||
}
|
||||
}
|
||||
|
||||
void Dihedral14CFAtomEnergy(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f,
|
||||
const int *LJtype, const float *charge, const float *boxlength_f, const int *a_14,
|
||||
const int *b_14, const float *cf_scale_factor, float *ene, cudaStream_t stream) {
|
||||
size_t thread_per_block = 128;
|
||||
size_t block_per_grid = ceilf(static_cast<float>(atom_numbers) / 128);
|
||||
UINT_VECTOR_LJ_TYPE *uint_crd_with_LJ = NULL;
|
||||
Cuda_Malloc_Safely(reinterpret_cast<void **>(&uint_crd_with_LJ), sizeof(UINT_VECTOR_LJ_TYPE) * atom_numbers);
|
||||
|
||||
UNSIGNED_INT_VECTOR *uint_crd =
|
||||
const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f));
|
||||
|
||||
Copy_Crd_To_New_Crd_Start<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
|
||||
atom_numbers, uint_crd, uint_crd_with_LJ, LJtype, charge);
|
||||
|
||||
VECTOR *boxlength = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(boxlength_f));
|
||||
Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128, 0, stream>>>(atom_numbers, ene, 0.);
|
||||
Dihedral14CFAtomEnergyKernel<<<block_per_grid, thread_per_block, 0, stream>>>(
|
||||
dihedral_14_numbers, uint_crd_with_LJ, boxlength, a_14, b_14, cf_scale_factor, ene);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
void Dihedral14CFAtomEnergy(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f,
|
||||
const int *LJtype, const float *charge, const float *boxlength_f, const int *a_14,
|
||||
const int *b_14, const float *cf_scale_factor, float *ene, cudaStream_t stream);
|
|
@ -1,27 +0,0 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_CF_ATOM_ENERGY_IMPL_H
|
||||
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_CF_ATOM_ENERGY_IMPL_H
|
||||
|
||||
#include <curand_kernel.h>
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
|
||||
|
||||
CUDA_LIB_EXPORT void Dihedral14CFAtomEnergy(const int dihedral_14_numbers, const int atom_numbers,
|
||||
const int *uint_crd_f, const int *LJtype, const float *charge,
|
||||
const float *boxlength_f, const int *a_14, const int *b_14,
|
||||
const float *cf_scale_factor, float *ene, cudaStream_t stream);
|
||||
#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_CF_ENERGY_IMPL_H
|
|
@ -1,78 +0,0 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/sponge/nb14/dihedral_14_cf_energy_impl.cuh"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common_sponge.cuh"
|
||||
|
||||
__global__ void Dihedral14CFEnergyKernel(const int dihedral_14_numbers, const UINT_VECTOR_LJ_TYPE *uint_crd,
|
||||
const VECTOR *boxlength, const int *a_14, const int *b_14,
|
||||
const float *cf_scale_factor, float *ene) {
|
||||
int dihedral_14_i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (dihedral_14_i < dihedral_14_numbers) {
|
||||
int atom_i = a_14[dihedral_14_i];
|
||||
int atom_j = b_14[dihedral_14_i];
|
||||
|
||||
UINT_VECTOR_LJ_TYPE r1 = uint_crd[atom_i];
|
||||
UINT_VECTOR_LJ_TYPE r2 = uint_crd[atom_j];
|
||||
|
||||
int int_x;
|
||||
int int_y;
|
||||
int int_z;
|
||||
VECTOR dr;
|
||||
float r_1;
|
||||
float ene_lin = 0.;
|
||||
|
||||
int_x = r2.uint_x - r1.uint_x;
|
||||
int_y = r2.uint_y - r1.uint_y;
|
||||
int_z = r2.uint_z - r1.uint_z;
|
||||
dr.x = boxlength[0].x * int_x;
|
||||
dr.y = boxlength[0].y * int_y;
|
||||
dr.z = boxlength[0].z * int_z;
|
||||
r_1 = rnorm3df(dr.x, dr.y, dr.z);
|
||||
|
||||
ene_lin = r1.charge * r2.charge * r_1;
|
||||
|
||||
ene_lin *= cf_scale_factor[dihedral_14_i];
|
||||
|
||||
ene[dihedral_14_i] = ene_lin;
|
||||
}
|
||||
}
|
||||
|
||||
void Dihedral14CFEnergy(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f, const int *LJtype,
|
||||
const float *charge, float *uint_crd_with_LJ_f, const float *boxlength_f, const int *a_14,
|
||||
const int *b_14, const float *cf_scale_factor, float *ene, cudaStream_t stream) {
|
||||
size_t thread_per_block = 32;
|
||||
size_t block_per_grid = ceilf(static_cast<float>(dihedral_14_numbers) / 32);
|
||||
|
||||
UNSIGNED_INT_VECTOR *uint_crd =
|
||||
const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f));
|
||||
|
||||
UINT_VECTOR_LJ_TYPE *uint_crd_with_LJ = reinterpret_cast<UINT_VECTOR_LJ_TYPE *>(uint_crd_with_LJ_f);
|
||||
|
||||
Copy_Crd_To_New_Crd_Start<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
|
||||
atom_numbers, uint_crd, uint_crd_with_LJ, LJtype, charge);
|
||||
|
||||
VECTOR *boxlength = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(boxlength_f));
|
||||
|
||||
Dihedral14CFEnergyKernel<<<block_per_grid, thread_per_block, 0, stream>>>(
|
||||
dihedral_14_numbers, uint_crd_with_LJ, boxlength, a_14, b_14, cf_scale_factor, ene);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
void Dihedral14CFEnergy(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f, const int *LJtype,
|
||||
const float *charge, float *uint_crd_with_LJ_f, const float *boxlength_f, const int *a_14,
|
||||
const int *b_14, const float *cf_scale_factor, float *ene, cudaStream_t stream);
|
|
@ -1,27 +0,0 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_CF_ENERGY_IMPL_H
|
||||
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_CF_ENERGY_IMPL_H
|
||||
|
||||
#include <curand_kernel.h>
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
|
||||
|
||||
CUDA_LIB_EXPORT void Dihedral14CFEnergy(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f,
|
||||
const int *LJtype, const float *charge, float *uint_crd_with_LJ_f,
|
||||
const float *boxlength_f, const int *a_14, const int *b_14,
|
||||
const float *cf_scale_factor, float *ene, cudaStream_t stream);
|
||||
#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_CF_ENERGY_IMPL_H
|
|
@ -1,102 +0,0 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/sponge/nb14/dihedral_14_lj_atom_energy_impl.cuh"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common_sponge.cuh"
|
||||
|
||||
__global__ void Dihedral14LJAtomEnergyKernel(const int dihedral_14_numbers, const UINT_VECTOR_LJ_TYPE *uint_crd,
|
||||
const VECTOR *boxlength, const int *a_14, const int *b_14,
|
||||
const float *lj_scale_factor, const float *LJ_type_A,
|
||||
const float *LJ_type_B, float *ene) {
|
||||
int dihedral_14_i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (dihedral_14_i < dihedral_14_numbers) {
|
||||
int atom_i = a_14[dihedral_14_i];
|
||||
int atom_j = b_14[dihedral_14_i];
|
||||
|
||||
UINT_VECTOR_LJ_TYPE r1 = uint_crd[atom_i];
|
||||
UINT_VECTOR_LJ_TYPE r2 = uint_crd[atom_j];
|
||||
|
||||
int int_x;
|
||||
int int_y;
|
||||
int int_z;
|
||||
VECTOR dr;
|
||||
float dr2;
|
||||
float dr_2;
|
||||
float dr_4;
|
||||
float dr_6;
|
||||
float dr_12;
|
||||
float ene_lin = 0.;
|
||||
int x, y;
|
||||
int atom_pair_LJ_type;
|
||||
|
||||
int_x = r2.uint_x - r1.uint_x;
|
||||
int_y = r2.uint_y - r1.uint_y;
|
||||
int_z = r2.uint_z - r1.uint_z;
|
||||
dr.x = boxlength[0].x * int_x;
|
||||
dr.y = boxlength[0].y * int_y;
|
||||
dr.z = boxlength[0].z * int_z;
|
||||
dr2 = dr.x * dr.x + dr.y * dr.y + dr.z * dr.z;
|
||||
|
||||
dr_2 = 1. / dr2;
|
||||
dr_4 = dr_2 * dr_2;
|
||||
dr_6 = dr_4 * dr_2;
|
||||
dr_12 = dr_6 * dr_6;
|
||||
|
||||
y = (r2.LJ_type - r1.LJ_type);
|
||||
x = y >> 31;
|
||||
y = (y ^ x) - x;
|
||||
x = r2.LJ_type + r1.LJ_type;
|
||||
r2.LJ_type = (x + y) >> 1;
|
||||
x = (x - y) >> 1;
|
||||
atom_pair_LJ_type = (r2.LJ_type * (r2.LJ_type + 1) >> 1) + x;
|
||||
|
||||
ene_lin = 0.08333333 * LJ_type_A[atom_pair_LJ_type] * dr_12 -
|
||||
0.1666666 * LJ_type_B[atom_pair_LJ_type] * dr_6; // LJ的A,B系数已经乘以12和6因此要反乘
|
||||
ene_lin *= lj_scale_factor[dihedral_14_i];
|
||||
|
||||
atomicAdd(&ene[atom_i], ene_lin);
|
||||
}
|
||||
}
|
||||
|
||||
void Dihedral14LJAtomEnergy(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f,
|
||||
const int *LJtype, const float *charge, const float *boxlength_f, const int *a_14,
|
||||
const int *b_14, const float *lj_scale_factor, const float *LJ_type_A,
|
||||
const float *LJ_type_B, float *ene, cudaStream_t stream) {
|
||||
size_t thread_per_block = 128;
|
||||
size_t block_per_grid = ceilf(static_cast<float>(atom_numbers) / 128);
|
||||
UINT_VECTOR_LJ_TYPE *uint_crd_with_LJ = NULL;
|
||||
Cuda_Malloc_Safely(reinterpret_cast<void **>(&uint_crd_with_LJ), sizeof(UINT_VECTOR_LJ_TYPE) * atom_numbers);
|
||||
|
||||
UNSIGNED_INT_VECTOR *uint_crd =
|
||||
const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f));
|
||||
|
||||
Copy_Crd_To_New_Crd_Start<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
|
||||
atom_numbers, uint_crd, uint_crd_with_LJ, LJtype, charge);
|
||||
|
||||
VECTOR *boxlength = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(boxlength_f));
|
||||
Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128, 0, stream>>>(atom_numbers, ene, 0.);
|
||||
Dihedral14LJAtomEnergyKernel<<<block_per_grid, thread_per_block, 0, stream>>>(
|
||||
dihedral_14_numbers, uint_crd_with_LJ, boxlength, a_14, b_14, lj_scale_factor, LJ_type_A, LJ_type_B, ene);
|
||||
|
||||
cudaStreamSynchronize(stream);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
void Dihedral14LJAtomEnergy(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f,
|
||||
const int *LJtype, const float *charge, const float *boxlength_f, const int *a_14,
|
||||
const int *b_14, const float *lj_scale_factor, const float *LJ_type_A,
|
||||
const float *LJ_type_B, float *ene, cudaStream_t stream);
|
|
@ -1,28 +0,0 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_LJ_ATOM_ENERGY_IMPL_H
|
||||
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_LJ_ATOM_ENERGY_IMPL_H
|
||||
|
||||
#include <curand_kernel.h>
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
|
||||
|
||||
CUDA_LIB_EXPORT void Dihedral14LJAtomEnergy(const int dihedral_14_numbers, const int atom_numbers,
|
||||
const int *uint_crd_f, const int *LJtype, const float *charge,
|
||||
const float *boxlength_f, const int *a_14, const int *b_14,
|
||||
const float *lj_scale_factor, const float *LJ_type_A,
|
||||
const float *LJ_type_B, float *ene, cudaStream_t stream);
|
||||
#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_LJ_ATOM_ENERGY_IMPL_H
|
|
@ -1,139 +0,0 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/sponge/nb14/dihedral_14_lj_cf_force_with_atom_energy_and_virial_impl.cuh"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common_sponge.cuh"
|
||||
|
||||
__global__ void Dihedral14LJCFForceWithAtomEnergyAndVirialKernel(
|
||||
const int dihedral_14_numbers, const UINT_VECTOR_LJ_TYPE *uint_crd, const VECTOR *scaler, const int *a_14,
|
||||
const int *b_14, const float *lj_scale_factor, const float *cf_scale_factor, const float *LJ_type_A,
|
||||
const float *LJ_type_B, VECTOR *frc, float *atom_energy, float *atom_virial) {
|
||||
int dihedral_14_i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (dihedral_14_i < dihedral_14_numbers) {
|
||||
UINT_VECTOR_LJ_TYPE r1, r2;
|
||||
VECTOR dr;
|
||||
float dr_abs;
|
||||
float dr2;
|
||||
float dr_1;
|
||||
float dr_2;
|
||||
float dr_4;
|
||||
float dr_8;
|
||||
float dr_14;
|
||||
float frc_abs = 0.;
|
||||
VECTOR temp_frc;
|
||||
|
||||
float ene_lin;
|
||||
float ene_lin2;
|
||||
|
||||
int x, y;
|
||||
int atom_pair_LJ_type;
|
||||
|
||||
int atom_i = a_14[dihedral_14_i];
|
||||
int atom_j = b_14[dihedral_14_i];
|
||||
|
||||
r1 = uint_crd[atom_i];
|
||||
r2 = uint_crd[atom_j];
|
||||
|
||||
dr = Get_Periodic_Displacement(r2, r1, scaler[0]);
|
||||
|
||||
dr2 = dr.x * dr.x + dr.y * dr.y + dr.z * dr.z;
|
||||
|
||||
dr_2 = 1.0 / dr2;
|
||||
dr_4 = dr_2 * dr_2;
|
||||
dr_8 = dr_4 * dr_4;
|
||||
dr_14 = dr_8 * dr_4 * dr_2;
|
||||
dr_abs = norm3df(dr.x, dr.y, dr.z);
|
||||
dr_1 = 1. / dr_abs;
|
||||
|
||||
// CF
|
||||
float charge_i = r1.charge;
|
||||
float charge_j = r2.charge;
|
||||
float frc_cf_abs;
|
||||
frc_cf_abs = cf_scale_factor[dihedral_14_i] * dr_2 * dr_1;
|
||||
frc_cf_abs = -charge_i * charge_j * frc_cf_abs;
|
||||
// LJ
|
||||
y = (r2.LJ_type - r1.LJ_type);
|
||||
x = y >> 31;
|
||||
y = (y ^ x) - x;
|
||||
x = r2.LJ_type + r1.LJ_type;
|
||||
r2.LJ_type = (x + y) >> 1;
|
||||
x = (x - y) >> 1;
|
||||
atom_pair_LJ_type = (r2.LJ_type * (r2.LJ_type + 1) >> 1) + x;
|
||||
|
||||
frc_abs = -LJ_type_A[atom_pair_LJ_type] * dr_14 + LJ_type_B[atom_pair_LJ_type] * dr_8;
|
||||
frc_abs *= lj_scale_factor[dihedral_14_i];
|
||||
|
||||
frc_abs += frc_cf_abs;
|
||||
temp_frc.x = frc_abs * dr.x;
|
||||
temp_frc.y = frc_abs * dr.y;
|
||||
temp_frc.z = frc_abs * dr.z;
|
||||
|
||||
atomicAdd(&frc[atom_j].x, -temp_frc.x);
|
||||
atomicAdd(&frc[atom_j].y, -temp_frc.y);
|
||||
atomicAdd(&frc[atom_j].z, -temp_frc.z);
|
||||
atomicAdd(&frc[atom_i].x, temp_frc.x);
|
||||
atomicAdd(&frc[atom_i].y, temp_frc.y);
|
||||
atomicAdd(&frc[atom_i].z, temp_frc.z);
|
||||
|
||||
ene_lin = r1.charge * r2.charge * dr_1;
|
||||
ene_lin *= cf_scale_factor[dihedral_14_i];
|
||||
ene_lin2 = 0.08333333 * LJ_type_A[atom_pair_LJ_type] * dr_4 * dr_8 -
|
||||
0.1666666 * LJ_type_B[atom_pair_LJ_type] * dr_4 * dr_2; // LJ的A,B系数已经乘以12和6因此要反乘
|
||||
ene_lin2 *= lj_scale_factor[dihedral_14_i];
|
||||
|
||||
atomicAdd(&atom_energy[atom_i], ene_lin + ene_lin2);
|
||||
|
||||
atomicAdd(&atom_virial[atom_i], -temp_frc * dr);
|
||||
}
|
||||
}
|
||||
|
||||
void Dihedral14LJCFForceWithAtomEnergyAndVirial(const int dihedral_14_numbers, const int atom_numbers,
|
||||
const int *uint_crd_f, const int *LJtype, const float *charge,
|
||||
float *uint_crd_with_LJ_f, const float *boxlength_f, const int *a_14,
|
||||
const int *b_14, const float *lj_scale_factor,
|
||||
const float *cf_scale_factor, const float *LJ_type_A,
|
||||
const float *LJ_type_B, float *frc_f, float *atom_energy,
|
||||
float *atom_virial, cudaStream_t stream) {
|
||||
size_t thread_per_block = 128;
|
||||
size_t block_per_grid = ceilf(static_cast<float>(atom_numbers) / 128);
|
||||
UNSIGNED_INT_VECTOR *uint_crd =
|
||||
const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f));
|
||||
|
||||
UINT_VECTOR_LJ_TYPE *uint_crd_with_LJ = reinterpret_cast<UINT_VECTOR_LJ_TYPE *>(uint_crd_with_LJ_f);
|
||||
|
||||
Copy_Crd_To_New_Crd_Start<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
|
||||
atom_numbers, uint_crd, uint_crd_with_LJ, LJtype, charge);
|
||||
|
||||
Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128, 0, stream>>>(3 * atom_numbers, frc_f, 0.);
|
||||
Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128, 0, stream>>>(atom_numbers, atom_energy, 0.);
|
||||
Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128, 0, stream>>>(atom_numbers, atom_virial, 0.);
|
||||
VECTOR *boxlength = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(boxlength_f));
|
||||
VECTOR *frc = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(frc_f));
|
||||
|
||||
Dihedral14LJCFForceWithAtomEnergyAndVirialKernel<<<block_per_grid, thread_per_block, 0, stream>>>(
|
||||
dihedral_14_numbers, uint_crd_with_LJ, boxlength, a_14, b_14, lj_scale_factor, cf_scale_factor, LJ_type_A,
|
||||
LJ_type_B, frc, atom_energy, atom_virial);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
void Dihedral14LJCFForceWithAtomEnergyAndVirial(const int dihedral_14_numbers, const int atom_numbers,
|
||||
const int *uint_crd_f, const int *LJtype, const float *charge,
|
||||
float *uint_crd_with_LJ_f, const float *boxlength_f, const int *a_14,
|
||||
const int *b_14, const float *lj_scale_factor,
|
||||
const float *cf_scale_factor, const float *LJ_type_A,
|
||||
const float *LJ_type_B, float *frc_f, float *atom_energy,
|
||||
float *atom_virial, cudaStream_t stream);
|
|
@ -1,28 +0,0 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_LJ_CF_FORCE_WITH_ATOM_ENERGY_IMPL_H
|
||||
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_LJ_CF_FORCE_WITH_ATOM_ENERGY_IMPL_H
|
||||
|
||||
#include <curand_kernel.h>
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
|
||||
|
||||
CUDA_LIB_EXPORT void Dihedral14LJCFForceWithAtomEnergyAndVirial(
|
||||
const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f, const int *LJtype, const float *charge,
|
||||
float *uint_crd_with_LJ_f, const float *boxlength_f, const int *a_14, const int *b_14, const float *lj_scale_factor,
|
||||
const float *cf_scale_factor, const float *LJ_type_A, const float *LJ_type_B, float *frc_f, float *atom_energy,
|
||||
float *atom_virial, cudaStream_t stream);
|
||||
#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_LJ_CF_FORCE_WITH_ATOM_ENERGY_IMPL_H
|
|
@ -1,140 +0,0 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/sponge/nb14/dihedral_14_lj_cf_force_with_atom_energy_impl.cuh"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common_sponge.cuh"
|
||||
|
||||
__global__ void Dihedral14LJCFForceWithAtomEnergyKernel(const int dihedral_14_numbers,
|
||||
const UINT_VECTOR_LJ_TYPE *uint_crd, const VECTOR *boxlength,
|
||||
const int *a_14, const int *b_14, const float *lj_scale_factor,
|
||||
const float *cf_scale_factor, const float *LJ_type_A,
|
||||
const float *LJ_type_B, VECTOR *frc, float *atom_energy) {
|
||||
int dihedral_14_i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (dihedral_14_i < dihedral_14_numbers) {
|
||||
int int_x;
|
||||
int int_y;
|
||||
int int_z;
|
||||
UINT_VECTOR_LJ_TYPE r1, r2;
|
||||
VECTOR dr;
|
||||
float dr_abs;
|
||||
float dr2;
|
||||
float dr_1;
|
||||
float dr_2;
|
||||
float dr_4;
|
||||
float dr_8;
|
||||
float dr_14;
|
||||
float frc_abs = 0.;
|
||||
VECTOR temp_frc;
|
||||
|
||||
float ene_lin;
|
||||
float ene_lin2;
|
||||
|
||||
int x, y;
|
||||
int atom_pair_LJ_type;
|
||||
|
||||
int atom_i = a_14[dihedral_14_i];
|
||||
int atom_j = b_14[dihedral_14_i];
|
||||
|
||||
r1 = uint_crd[atom_i];
|
||||
r2 = uint_crd[atom_j];
|
||||
int_x = r2.uint_x - r1.uint_x;
|
||||
int_y = r2.uint_y - r1.uint_y;
|
||||
int_z = r2.uint_z - r1.uint_z;
|
||||
dr.x = boxlength[0].x * int_x;
|
||||
dr.y = boxlength[0].y * int_y;
|
||||
dr.z = boxlength[0].z * int_z;
|
||||
dr2 = dr.x * dr.x + dr.y * dr.y + dr.z * dr.z;
|
||||
|
||||
dr_2 = 1.0 / dr2;
|
||||
dr_4 = dr_2 * dr_2;
|
||||
dr_8 = dr_4 * dr_4;
|
||||
dr_14 = dr_8 * dr_4 * dr_2;
|
||||
dr_abs = norm3df(dr.x, dr.y, dr.z);
|
||||
dr_1 = 1. / dr_abs;
|
||||
|
||||
float charge_i = r1.charge;
|
||||
float charge_j = r2.charge;
|
||||
float frc_cf_abs;
|
||||
frc_cf_abs = cf_scale_factor[dihedral_14_i] * dr_2 * dr_1;
|
||||
frc_cf_abs = -charge_i * charge_j * frc_cf_abs;
|
||||
|
||||
y = (r2.LJ_type - r1.LJ_type);
|
||||
x = y >> 31;
|
||||
y = (y ^ x) - x;
|
||||
x = r2.LJ_type + r1.LJ_type;
|
||||
r2.LJ_type = (x + y) >> 1;
|
||||
x = (x - y) >> 1;
|
||||
atom_pair_LJ_type = (r2.LJ_type * (r2.LJ_type + 1) >> 1) + x;
|
||||
|
||||
frc_abs = -LJ_type_A[atom_pair_LJ_type] * dr_14 + LJ_type_B[atom_pair_LJ_type] * dr_8;
|
||||
frc_abs *= lj_scale_factor[dihedral_14_i];
|
||||
|
||||
frc_abs += frc_cf_abs;
|
||||
temp_frc.x = frc_abs * dr.x;
|
||||
temp_frc.y = frc_abs * dr.y;
|
||||
temp_frc.z = frc_abs * dr.z;
|
||||
|
||||
atomicAdd(&frc[atom_j].x, -temp_frc.x);
|
||||
atomicAdd(&frc[atom_j].y, -temp_frc.y);
|
||||
atomicAdd(&frc[atom_j].z, -temp_frc.z);
|
||||
atomicAdd(&frc[atom_i].x, temp_frc.x);
|
||||
atomicAdd(&frc[atom_i].y, temp_frc.y);
|
||||
atomicAdd(&frc[atom_i].z, temp_frc.z);
|
||||
|
||||
ene_lin = r1.charge * r2.charge * dr_1;
|
||||
ene_lin *= cf_scale_factor[dihedral_14_i];
|
||||
ene_lin2 = 0.08333333 * LJ_type_A[atom_pair_LJ_type] * dr_4 * dr_8 -
|
||||
0.1666666 * LJ_type_B[atom_pair_LJ_type] * dr_4 * dr_2; // LJ的A,B系数已经乘以12和6因此要反乘
|
||||
ene_lin2 *= lj_scale_factor[dihedral_14_i];
|
||||
|
||||
atomicAdd(&atom_energy[atom_i], ene_lin + ene_lin2);
|
||||
}
|
||||
}
|
||||
|
||||
void Dihedral14LJCFForceWithAtomEnergy(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f,
|
||||
const int *LJtype, const float *charge, float *uint_crd_with_LJ_f,
|
||||
const float *boxlength_f, const int *a_14, const int *b_14,
|
||||
const float *lj_scale_factor, const float *cf_scale_factor,
|
||||
const float *LJ_type_A, const float *LJ_type_B, float *frc_f, float *atom_energy,
|
||||
cudaStream_t stream) {
|
||||
size_t thread_per_block = 128;
|
||||
size_t block_per_grid = ceilf(static_cast<float>(dihedral_14_numbers) / 128);
|
||||
UNSIGNED_INT_VECTOR *uint_crd =
|
||||
const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f));
|
||||
|
||||
UINT_VECTOR_LJ_TYPE *uint_crd_with_LJ = reinterpret_cast<UINT_VECTOR_LJ_TYPE *>(uint_crd_with_LJ_f);
|
||||
|
||||
Copy_Crd_To_New_Crd_Start<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
|
||||
atom_numbers, uint_crd, uint_crd_with_LJ, LJtype, charge);
|
||||
|
||||
Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128, 0, stream>>>(3 * atom_numbers, frc_f, 0.);
|
||||
Reset_List<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>(atom_numbers, atom_energy, 0.);
|
||||
VECTOR *boxlength = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(boxlength_f));
|
||||
VECTOR *frc = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(frc_f));
|
||||
|
||||
Dihedral14LJCFForceWithAtomEnergyKernel<<<block_per_grid, thread_per_block, 0, stream>>>(
|
||||
dihedral_14_numbers, uint_crd_with_LJ, boxlength, a_14, b_14, lj_scale_factor, cf_scale_factor, LJ_type_A,
|
||||
LJ_type_B, frc, atom_energy);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
void Dihedral14LJCFForceWithAtomEnergy(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f,
|
||||
const int *LJtype, const float *charge, float *uint_crd_with_LJ_f,
|
||||
const float *boxlength_f, const int *a_14, const int *b_14,
|
||||
const float *lj_scale_factor, const float *cf_scale_factor,
|
||||
const float *LJ_type_A, const float *LJ_type_B, float *frc_f, float *atom_energy,
|
||||
cudaStream_t stream);
|
|
@ -1,30 +0,0 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_LJ_CF_FORCE_WITH_ATOM_ENERGY_IMPL_H
|
||||
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_LJ_CF_FORCE_WITH_ATOM_ENERGY_IMPL_H
|
||||
|
||||
#include <curand_kernel.h>
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
|
||||
|
||||
CUDA_LIB_EXPORT void Dihedral14LJCFForceWithAtomEnergy(const int dihedral_14_numbers, const int atom_numbers,
|
||||
const int *uint_crd_f, const int *LJtype, const float *charge,
|
||||
float *uint_crd_with_LJ_f, const float *boxlength_f,
|
||||
const int *a_14, const int *b_14, const float *lj_scale_factor,
|
||||
const float *cf_scale_factor, const float *LJ_type_A,
|
||||
const float *LJ_type_B, float *frc_f, float *atom_energy,
|
||||
cudaStream_t stream);
|
||||
#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_LJ_CF_FORCE_WITH_ATOM_ENERGY_IMPL_H
|
|
@ -1,98 +0,0 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/sponge/nb14/dihedral_14_lj_energy_impl.cuh"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common_sponge.cuh"
|
||||
|
||||
__global__ void Dihedral14LJEnergyKernel(const int dihedral_14_numbers, const UINT_VECTOR_LJ_TYPE *uint_crd,
|
||||
const VECTOR *boxlength, const int *a_14, const int *b_14,
|
||||
const float *lj_scale_factor, const float *LJ_type_A, const float *LJ_type_B,
|
||||
float *ene) {
|
||||
int dihedral_14_i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (dihedral_14_i < dihedral_14_numbers) {
|
||||
int atom_i = a_14[dihedral_14_i];
|
||||
int atom_j = b_14[dihedral_14_i];
|
||||
|
||||
UINT_VECTOR_LJ_TYPE r1 = uint_crd[atom_i];
|
||||
UINT_VECTOR_LJ_TYPE r2 = uint_crd[atom_j];
|
||||
|
||||
int int_x;
|
||||
int int_y;
|
||||
int int_z;
|
||||
VECTOR dr;
|
||||
float dr2;
|
||||
float dr_2;
|
||||
float dr_4;
|
||||
float dr_6;
|
||||
float dr_12;
|
||||
float ene_lin = 0.;
|
||||
int x, y;
|
||||
int atom_pair_LJ_type;
|
||||
|
||||
int_x = r2.uint_x - r1.uint_x;
|
||||
int_y = r2.uint_y - r1.uint_y;
|
||||
int_z = r2.uint_z - r1.uint_z;
|
||||
dr.x = boxlength[0].x * int_x;
|
||||
dr.y = boxlength[0].y * int_y;
|
||||
dr.z = boxlength[0].z * int_z;
|
||||
dr2 = dr.x * dr.x + dr.y * dr.y + dr.z * dr.z;
|
||||
|
||||
dr_2 = 1. / dr2;
|
||||
dr_4 = dr_2 * dr_2;
|
||||
dr_6 = dr_4 * dr_2;
|
||||
dr_12 = dr_6 * dr_6;
|
||||
|
||||
y = (r2.LJ_type - r1.LJ_type);
|
||||
x = y >> 31;
|
||||
y = (y ^ x) - x;
|
||||
x = r2.LJ_type + r1.LJ_type;
|
||||
r2.LJ_type = (x + y) >> 1;
|
||||
x = (x - y) >> 1;
|
||||
atom_pair_LJ_type = (r2.LJ_type * (r2.LJ_type + 1) >> 1) + x;
|
||||
|
||||
ene_lin = 0.08333333 * LJ_type_A[atom_pair_LJ_type] * dr_12 -
|
||||
0.1666666 * LJ_type_B[atom_pair_LJ_type] * dr_6; // LJ的A,B系数已经乘以12和6因此要反乘
|
||||
ene_lin *= lj_scale_factor[dihedral_14_i];
|
||||
|
||||
ene[dihedral_14_i] = ene_lin;
|
||||
}
|
||||
}
|
||||
|
||||
void Dihedral14LJEnergy(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f, const int *LJtype,
|
||||
const float *charge, float *uint_crd_with_LJ_f, const float *boxlength_f, const int *a_14,
|
||||
const int *b_14, const float *lj_scale_factor, const float *LJ_type_A, const float *LJ_type_B,
|
||||
float *ene, cudaStream_t stream) {
|
||||
size_t thread_per_block = 32;
|
||||
size_t block_per_grid = ceilf(static_cast<float>(dihedral_14_numbers) / 32);
|
||||
UNSIGNED_INT_VECTOR *uint_crd =
|
||||
const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f));
|
||||
|
||||
UINT_VECTOR_LJ_TYPE *uint_crd_with_LJ = reinterpret_cast<UINT_VECTOR_LJ_TYPE *>(uint_crd_with_LJ_f);
|
||||
|
||||
Copy_Crd_To_New_Crd_Start<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
|
||||
atom_numbers, uint_crd, uint_crd_with_LJ, LJtype, charge);
|
||||
VECTOR *boxlength = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(boxlength_f));
|
||||
|
||||
Dihedral14LJEnergyKernel<<<block_per_grid, thread_per_block, 0, stream>>>(
|
||||
dihedral_14_numbers, uint_crd_with_LJ, boxlength, a_14, b_14, lj_scale_factor, LJ_type_A, LJ_type_B, ene);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
void Dihedral14LJEnergy(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f, const int *LJtype,
|
||||
const float *charge, float *uint_crd_with_LJ_f, const float *boxlength_f, const int *a_14,
|
||||
const int *b_14, const float *lj_scale_factor, const float *LJ_type_A, const float *LJ_type_B,
|
||||
float *ene, cudaStream_t stream);
|
|
@ -1,29 +0,0 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_LJ_ENERGY_IMPL_H
|
||||
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_LJ_ENERGY_IMPL_H
|
||||
|
||||
#include <curand_kernel.h>
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
|
||||
|
||||
CUDA_LIB_EXPORT void Dihedral14LJEnergy(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f,
|
||||
const int *LJtype, const float *charge, float *uint_crd_with_LJ_f,
|
||||
const float *boxlength_f, const int *a_14, const int *b_14,
|
||||
const float *lj_scale_factor, const float *LJ_type_A, const float *LJ_type_B,
|
||||
float *ene, cudaStream_t stream);
|
||||
|
||||
#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_LJ_ENERGY_IMPL_H
|
|
@ -1,111 +0,0 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/sponge/nb14/dihedral_14_lj_force_impl.cuh"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common_sponge.cuh"
|
||||
|
||||
__global__ void Dihedral14LJForceKernel(const int dihedral_14_numbers, const UINT_VECTOR_LJ_TYPE *uint_crd,
|
||||
const VECTOR *boxlength, const int *a_14, const int *b_14,
|
||||
const float *lj_scale_factor, const float *LJ_type_A, const float *LJ_type_B,
|
||||
VECTOR *frc) {
|
||||
int dihedral_14_i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (dihedral_14_i < dihedral_14_numbers) {
|
||||
int int_x;
|
||||
int int_y;
|
||||
int int_z;
|
||||
UINT_VECTOR_LJ_TYPE r1, r2;
|
||||
VECTOR dr;
|
||||
float dr2;
|
||||
float dr_2;
|
||||
float dr_4;
|
||||
float dr_8;
|
||||
float dr_14;
|
||||
float frc_abs = 0.;
|
||||
VECTOR temp_frc;
|
||||
int x, y;
|
||||
int atom_pair_LJ_type;
|
||||
|
||||
int atom_i = a_14[dihedral_14_i];
|
||||
int atom_j = b_14[dihedral_14_i];
|
||||
|
||||
r1 = uint_crd[atom_i];
|
||||
r2 = uint_crd[atom_j];
|
||||
|
||||
int_x = r2.uint_x - r1.uint_x;
|
||||
int_y = r2.uint_y - r1.uint_y;
|
||||
int_z = r2.uint_z - r1.uint_z;
|
||||
dr.x = boxlength[0].x * int_x;
|
||||
dr.y = boxlength[0].y * int_y;
|
||||
dr.z = boxlength[0].z * int_z;
|
||||
dr2 = dr.x * dr.x + dr.y * dr.y + dr.z * dr.z;
|
||||
|
||||
dr_2 = 1.0 / dr2;
|
||||
dr_4 = dr_2 * dr_2;
|
||||
dr_8 = dr_4 * dr_4;
|
||||
dr_14 = dr_8 * dr_4 * dr_2;
|
||||
|
||||
y = (r2.LJ_type - r1.LJ_type);
|
||||
x = y >> 31;
|
||||
y = (y ^ x) - x;
|
||||
x = r2.LJ_type + r1.LJ_type;
|
||||
r2.LJ_type = (x + y) >> 1;
|
||||
x = (x - y) >> 1;
|
||||
atom_pair_LJ_type = (r2.LJ_type * (r2.LJ_type + 1) >> 1) + x;
|
||||
|
||||
frc_abs = -LJ_type_A[atom_pair_LJ_type] * dr_14 + LJ_type_B[atom_pair_LJ_type] * dr_8;
|
||||
frc_abs *= lj_scale_factor[dihedral_14_i];
|
||||
temp_frc.x = frc_abs * dr.x;
|
||||
temp_frc.y = frc_abs * dr.y;
|
||||
temp_frc.z = frc_abs * dr.z;
|
||||
|
||||
atomicAdd(&frc[atom_j].x, -temp_frc.x);
|
||||
atomicAdd(&frc[atom_j].y, -temp_frc.y);
|
||||
atomicAdd(&frc[atom_j].z, -temp_frc.z);
|
||||
atomicAdd(&frc[atom_i].x, temp_frc.x);
|
||||
atomicAdd(&frc[atom_i].y, temp_frc.y);
|
||||
atomicAdd(&frc[atom_i].z, temp_frc.z);
|
||||
}
|
||||
}
|
||||
|
||||
void Dihedral14LJForce(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f, const int *LJtype,
|
||||
const float *charge, const float *boxlength_f, const int *a_14, const int *b_14,
|
||||
const float *lj_scale_factor, const float *LJ_type_A, const float *LJ_type_B, float *frc_f,
|
||||
cudaStream_t stream) {
|
||||
size_t thread_per_block = 128;
|
||||
size_t block_per_grid = ceilf(static_cast<float>(atom_numbers) / 128);
|
||||
UINT_VECTOR_LJ_TYPE *uint_crd_with_LJ = NULL;
|
||||
Cuda_Malloc_Safely(reinterpret_cast<void **>(&uint_crd_with_LJ), sizeof(UINT_VECTOR_LJ_TYPE) * atom_numbers);
|
||||
|
||||
UNSIGNED_INT_VECTOR *uint_crd =
|
||||
const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f));
|
||||
|
||||
Copy_Crd_To_New_Crd_Start<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
|
||||
atom_numbers, uint_crd, uint_crd_with_LJ, LJtype, charge);
|
||||
cudaStreamSynchronize(stream);
|
||||
Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128, 0, stream>>>(3 * atom_numbers, frc_f, 0.);
|
||||
VECTOR *boxlength = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(boxlength_f));
|
||||
VECTOR *frc = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(frc_f));
|
||||
|
||||
Dihedral14LJForceKernel<<<block_per_grid, thread_per_block, 0, stream>>>(
|
||||
dihedral_14_numbers, uint_crd_with_LJ, boxlength, a_14, b_14, lj_scale_factor, LJ_type_A, LJ_type_B, frc);
|
||||
cudaStreamSynchronize(stream);
|
||||
return;
|
||||
}
|
||||
|
||||
void Dihedral14LJForce(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f, const int *LJtype,
|
||||
const float *charge, const float *boxlength_f, const int *a_14, const int *b_14,
|
||||
const float *lj_scale_factor, const float *LJ_type_A, const float *LJ_type_B, float *frc_f,
|
||||
cudaStream_t stream);
|
|
@ -1,28 +0,0 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_LJ_FORCE_IMPL_H
|
||||
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_LJ_FORCE_IMPL_H
|
||||
|
||||
#include <curand_kernel.h>
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
|
||||
|
||||
CUDA_LIB_EXPORT void Dihedral14LJForce(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f,
|
||||
const int *LJtype, const float *charge, const float *boxlength_f,
|
||||
const int *a_14, const int *b_14, const float *lj_scale_factor,
|
||||
const float *LJ_type_A, const float *LJ_type_B, float *frc_f,
|
||||
cudaStream_t stream);
|
||||
#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_LJ_FORCE_IMPL_H
|
|
@ -1,124 +0,0 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/sponge/nb14/dihedral_14_lj_force_with_direct_cf_impl.cuh"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common_sponge.cuh"
|
||||
|
||||
__global__ void Dihedral14LJForceWithDirectCFKernel(const int dihedral_14_numbers, const UINT_VECTOR_LJ_TYPE *uint_crd,
|
||||
const VECTOR *boxlength, const int *a_14, const int *b_14,
|
||||
const float *lj_scale_factor, const float *cf_scale_factor,
|
||||
const float *LJ_type_A, const float *LJ_type_B, VECTOR *frc) {
|
||||
int dihedral_14_i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (dihedral_14_i < dihedral_14_numbers) {
|
||||
int int_x;
|
||||
int int_y;
|
||||
int int_z;
|
||||
UINT_VECTOR_LJ_TYPE r1, r2;
|
||||
VECTOR dr;
|
||||
float dr_abs;
|
||||
float dr2;
|
||||
float dr_1;
|
||||
float dr_2;
|
||||
float dr_4;
|
||||
float dr_8;
|
||||
float dr_14;
|
||||
float frc_abs = 0.;
|
||||
VECTOR temp_frc;
|
||||
|
||||
int x, y;
|
||||
int atom_pair_LJ_type;
|
||||
|
||||
int atom_i = a_14[dihedral_14_i];
|
||||
int atom_j = b_14[dihedral_14_i];
|
||||
|
||||
r1 = uint_crd[atom_i];
|
||||
r2 = uint_crd[atom_j];
|
||||
int_x = r2.uint_x - r1.uint_x;
|
||||
int_y = r2.uint_y - r1.uint_y;
|
||||
int_z = r2.uint_z - r1.uint_z;
|
||||
dr.x = boxlength[0].x * int_x;
|
||||
dr.y = boxlength[0].y * int_y;
|
||||
dr.z = boxlength[0].z * int_z;
|
||||
dr2 = dr.x * dr.x + dr.y * dr.y + dr.z * dr.z;
|
||||
|
||||
dr_2 = 1.0 / dr2;
|
||||
dr_4 = dr_2 * dr_2;
|
||||
dr_8 = dr_4 * dr_4;
|
||||
dr_14 = dr_8 * dr_4 * dr_2;
|
||||
dr_abs = norm3df(dr.x, dr.y, dr.z);
|
||||
dr_1 = 1. / dr_abs;
|
||||
|
||||
float charge_i = r1.charge;
|
||||
float charge_j = r2.charge;
|
||||
float frc_cf_abs;
|
||||
frc_cf_abs = cf_scale_factor[dihedral_14_i] * dr_2 * dr_1;
|
||||
frc_cf_abs = -charge_i * charge_j * frc_cf_abs;
|
||||
// LJ
|
||||
y = (r2.LJ_type - r1.LJ_type);
|
||||
x = y >> 31;
|
||||
y = (y ^ x) - x;
|
||||
x = r2.LJ_type + r1.LJ_type;
|
||||
r2.LJ_type = (x + y) >> 1;
|
||||
x = (x - y) >> 1;
|
||||
atom_pair_LJ_type = (r2.LJ_type * (r2.LJ_type + 1) >> 1) + x;
|
||||
|
||||
frc_abs = -LJ_type_A[atom_pair_LJ_type] * dr_14 + LJ_type_B[atom_pair_LJ_type] * dr_8;
|
||||
frc_abs *= lj_scale_factor[dihedral_14_i];
|
||||
|
||||
frc_abs += frc_cf_abs;
|
||||
temp_frc.x = frc_abs * dr.x;
|
||||
temp_frc.y = frc_abs * dr.y;
|
||||
temp_frc.z = frc_abs * dr.z;
|
||||
|
||||
atomicAdd(&frc[atom_j].x, -temp_frc.x);
|
||||
atomicAdd(&frc[atom_j].y, -temp_frc.y);
|
||||
atomicAdd(&frc[atom_j].z, -temp_frc.z);
|
||||
atomicAdd(&frc[atom_i].x, temp_frc.x);
|
||||
atomicAdd(&frc[atom_i].y, temp_frc.y);
|
||||
atomicAdd(&frc[atom_i].z, temp_frc.z);
|
||||
}
|
||||
}
|
||||
|
||||
void Dihedral14LJForceWithDirectCF(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f,
|
||||
const int *LJtype, const float *charge, const float *boxlength_f, const int *a_14,
|
||||
const int *b_14, const float *lj_scale_factor, const float *cf_scale_factor,
|
||||
const float *LJ_type_A, const float *LJ_type_B, float *frc_f, cudaStream_t stream) {
|
||||
size_t thread_per_block = 128;
|
||||
size_t block_per_grid = ceilf(static_cast<float>(atom_numbers) / 128);
|
||||
UINT_VECTOR_LJ_TYPE *uint_crd_with_LJ = NULL;
|
||||
Cuda_Malloc_Safely(reinterpret_cast<void **>(&uint_crd_with_LJ), sizeof(UINT_VECTOR_LJ_TYPE) * atom_numbers);
|
||||
|
||||
UNSIGNED_INT_VECTOR *uint_crd =
|
||||
const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f));
|
||||
|
||||
Copy_Crd_To_New_Crd_Start<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
|
||||
atom_numbers, uint_crd, uint_crd_with_LJ, LJtype, charge);
|
||||
cudaStreamSynchronize(stream);
|
||||
VECTOR *boxlength = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(boxlength_f));
|
||||
Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128, 0, stream>>>(3 * atom_numbers, frc_f, 0.);
|
||||
VECTOR *frc = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(frc_f));
|
||||
|
||||
Dihedral14LJForceWithDirectCFKernel<<<block_per_grid, thread_per_block, 0, stream>>>(
|
||||
dihedral_14_numbers, uint_crd_with_LJ, boxlength, a_14, b_14, lj_scale_factor, cf_scale_factor, LJ_type_A,
|
||||
LJ_type_B, frc);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
void Dihedral14LJForceWithDirectCF(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f,
|
||||
const int *LJtype, const float *charge, const float *boxlength_f, const int *a_14,
|
||||
const int *b_14, const float *lj_scale_factor, const float *cf_scale_factor,
|
||||
const float *LJ_type_A, const float *LJ_type_B, float *frc_f, cudaStream_t stream);
|
|
@ -1,29 +0,0 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_LJ_FORCE_WITH_DIRECT_CF_IMPL_H
|
||||
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_LJ_FORCE_WITH_DIRECT_CF_IMPL_H
|
||||
|
||||
#include <curand_kernel.h>
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
|
||||
|
||||
CUDA_LIB_EXPORT void Dihedral14LJForceWithDirectCF(const int dihedral_14_numbers, const int atom_numbers,
|
||||
const int *uint_crd_f, const int *LJtype, const float *charge,
|
||||
const float *boxlength_f, const int *a_14, const int *b_14,
|
||||
const float *lj_scale_factor, const float *cf_scale_factor,
|
||||
const float *LJ_type_A, const float *LJ_type_B, float *frc_f,
|
||||
cudaStream_t stream);
|
||||
#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_LJ_FORCE_WITH_DIRECT_CF_IMPL_H
|
|
@ -1,644 +0,0 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Note:
|
||||
* NeighborListUpdate. This is an experimental interface that is subject to change and/or deletion.
|
||||
*/
|
||||
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/sponge/neighbor_list/neighbor_list_impl.cuh"
|
||||
#include <vector>
|
||||
|
||||
// common functions
|
||||
|
||||
static __global__ void Copy_List(const int element_numbers, const float *origin_list, float *list) {
|
||||
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (i < element_numbers) {
|
||||
list[i] = origin_list[i];
|
||||
}
|
||||
}
|
||||
|
||||
static __global__ void Crd_To_Uint_Crd(const int atom_numbers, float *scale_factor, const VECTOR *crd,
|
||||
UNSIGNED_INT_VECTOR *uint_crd) {
|
||||
int atom_i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (atom_i < atom_numbers) {
|
||||
INT_VECTOR tempi;
|
||||
VECTOR temp = crd[atom_i];
|
||||
|
||||
temp.x *= scale_factor[0];
|
||||
temp.y *= scale_factor[1];
|
||||
temp.z *= scale_factor[2];
|
||||
|
||||
tempi.int_x = temp.x;
|
||||
tempi.int_y = temp.y;
|
||||
tempi.int_z = temp.z;
|
||||
|
||||
uint_crd[atom_i].uint_x = (tempi.int_x << 2);
|
||||
uint_crd[atom_i].uint_y = (tempi.int_y << 2);
|
||||
uint_crd[atom_i].uint_z = (tempi.int_z << 2);
|
||||
}
|
||||
}
|
||||
|
||||
static __global__ void Crd_Periodic_Map(const int atom_numbers, VECTOR *crd, const float *box_length) {
|
||||
int atom_i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (atom_i < atom_numbers) {
|
||||
if (crd[atom_i].x >= 0) {
|
||||
if (crd[atom_i].x < box_length[0]) {
|
||||
} else {
|
||||
crd[atom_i].x = crd[atom_i].x - box_length[0];
|
||||
}
|
||||
} else {
|
||||
crd[atom_i].x = crd[atom_i].x + box_length[0];
|
||||
}
|
||||
|
||||
if (crd[atom_i].y >= 0) {
|
||||
if (crd[atom_i].y < box_length[1]) {
|
||||
} else {
|
||||
crd[atom_i].y = crd[atom_i].y - box_length[1];
|
||||
}
|
||||
} else {
|
||||
crd[atom_i].y = crd[atom_i].y + box_length[1];
|
||||
}
|
||||
if (crd[atom_i].z >= 0) {
|
||||
if (crd[atom_i].z < box_length[2]) {
|
||||
} else {
|
||||
crd[atom_i].z = crd[atom_i].z - box_length[2];
|
||||
}
|
||||
} else {
|
||||
crd[atom_i].z = crd[atom_i].z + box_length[2];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static __global__ void Clear_Grid_Bucket(const int grid_numbers, int *atom_numbers_in_grid_bucket,
|
||||
GRID_BUCKET *bucket) {
|
||||
int grid_serial = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (grid_serial < grid_numbers) {
|
||||
GRID_BUCKET bucket_i = bucket[grid_serial];
|
||||
for (int i = 0; i < atom_numbers_in_grid_bucket[grid_serial]; i = i + 1) {
|
||||
bucket_i.atom_serial[i] = -1;
|
||||
}
|
||||
atom_numbers_in_grid_bucket[grid_serial] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
static __global__ void Find_Atom_In_Grid_Serial(const int atom_numbers, const float *grid_length_inverse,
|
||||
const VECTOR *crd, const int *grid_N, const int gridxy,
|
||||
int *atom_in_grid_serial) {
|
||||
int atom_i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (atom_i < atom_numbers) {
|
||||
int Nx = static_cast<float>(crd[atom_i].x) * grid_length_inverse[0];
|
||||
int Ny = static_cast<float>(crd[atom_i].y) * grid_length_inverse[1];
|
||||
int Nz = static_cast<float>(crd[atom_i].z) * grid_length_inverse[2];
|
||||
Nx = Nx & ((Nx - grid_N[0]) >> 31);
|
||||
Ny = Ny & ((Ny - grid_N[1]) >> 31);
|
||||
Nz = Nz & ((Nz - grid_N[2]) >> 31);
|
||||
atom_in_grid_serial[atom_i] = Nz * gridxy + Ny * grid_N[0] + Nx;
|
||||
}
|
||||
}
|
||||
|
||||
static __global__ void Put_Atom_In_Grid_Bucket(const int atom_numbers, const int *atom_in_grid_serial,
|
||||
GRID_BUCKET *bucket, int *atom_numbers_in_grid_bucket) {
|
||||
int atom_i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (atom_i < atom_numbers) {
|
||||
int grid_serial = atom_in_grid_serial[atom_i];
|
||||
GRID_BUCKET bucket_i = bucket[grid_serial];
|
||||
int a = atom_numbers_in_grid_bucket[grid_serial];
|
||||
atomicCAS(&bucket_i.atom_serial[a], -1, atom_i);
|
||||
if (bucket_i.atom_serial[a] != atom_i) {
|
||||
while (true) {
|
||||
a = a + 1;
|
||||
atomicCAS(&bucket_i.atom_serial[a], -1, atom_i);
|
||||
if (bucket_i.atom_serial[a] == atom_i) {
|
||||
atomicAdd(&atom_numbers_in_grid_bucket[grid_serial], 1);
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
atomicAdd(&atom_numbers_in_grid_bucket[grid_serial], 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static __global__ void Find_atom_neighbors(const int atom_numbers, const UNSIGNED_INT_VECTOR *uint_crd,
|
||||
const float *uint_dr_to_dr_cof, const int *atom_in_grid_serial,
|
||||
const GRID_POINTER *gpointer, const GRID_BUCKET *bucket,
|
||||
const int *atom_numbers_in_grid_bucket, NEIGHBOR_LIST *nl,
|
||||
const float cutoff_skin_square) {
|
||||
int atom_i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (atom_i < atom_numbers) {
|
||||
int grid_serial = atom_in_grid_serial[atom_i];
|
||||
int grid_serial2;
|
||||
int atom_numbers_in_nl_lin = 0;
|
||||
int atom_j;
|
||||
int int_x;
|
||||
int int_y;
|
||||
int int_z;
|
||||
UNSIGNED_INT_VECTOR uint_crd_i = uint_crd[atom_i];
|
||||
NEIGHBOR_LIST nl_i = nl[atom_i];
|
||||
GRID_POINTER gpointer_i = gpointer[grid_serial];
|
||||
VECTOR dr;
|
||||
float dr2;
|
||||
for (int grid_cycle = 0; grid_cycle < 125; grid_cycle = grid_cycle + 1) {
|
||||
grid_serial2 = gpointer_i.grid_serial[grid_cycle];
|
||||
GRID_BUCKET bucket_i = bucket[grid_serial2];
|
||||
for (int i = 0; i < atom_numbers_in_grid_bucket[grid_serial2]; i = i + 1) {
|
||||
atom_j = bucket_i.atom_serial[i];
|
||||
if (atom_j > atom_i) {
|
||||
int_x = uint_crd[atom_j].uint_x - uint_crd_i.uint_x;
|
||||
int_y = uint_crd[atom_j].uint_y - uint_crd_i.uint_y;
|
||||
int_z = uint_crd[atom_j].uint_z - uint_crd_i.uint_z;
|
||||
dr.x = uint_dr_to_dr_cof[0] * int_x;
|
||||
dr.y = uint_dr_to_dr_cof[1] * int_y;
|
||||
dr.z = uint_dr_to_dr_cof[2] * int_z;
|
||||
dr2 = dr.x * dr.x + dr.y * dr.y + dr.z * dr.z;
|
||||
if (dr2 < cutoff_skin_square) {
|
||||
nl_i.atom_serial[atom_numbers_in_nl_lin] = atom_j;
|
||||
atom_numbers_in_nl_lin = atom_numbers_in_nl_lin + 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
nl[atom_i].atom_numbers = atom_numbers_in_nl_lin;
|
||||
}
|
||||
}
|
||||
|
||||
static __global__ void Delete_Excluded_Atoms_Serial_In_Neighbor_List(const int atom_numbers, NEIGHBOR_LIST *nl,
|
||||
const int *excluded_list_start,
|
||||
const int *excluded_list,
|
||||
const int *excluded_atom_numbers) {
|
||||
int atom_i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (atom_i < atom_numbers) {
|
||||
int excluded_number = excluded_atom_numbers[atom_i];
|
||||
if (excluded_number > 0) {
|
||||
int list_start = excluded_list_start[atom_i];
|
||||
int atom_min = excluded_list[list_start];
|
||||
int list_end = list_start + excluded_number;
|
||||
int atom_max = excluded_list[list_end - 1];
|
||||
NEIGHBOR_LIST nl_i = nl[atom_i];
|
||||
int atomnumbers_in_nl_lin = nl_i.atom_numbers;
|
||||
int atom_j;
|
||||
int excluded_atom_numbers_lin = list_end - list_start;
|
||||
int excluded_atom_numbers_count = 0;
|
||||
for (int i = 0; i < atomnumbers_in_nl_lin; i = i + 1) {
|
||||
atom_j = nl_i.atom_serial[i];
|
||||
if (atom_j < atom_min || atom_j > atom_max) {
|
||||
continue;
|
||||
} else {
|
||||
for (int j = list_start; j < list_end; j = j + 1) {
|
||||
if (atom_j == excluded_list[j]) {
|
||||
atomnumbers_in_nl_lin = atomnumbers_in_nl_lin - 1;
|
||||
nl_i.atom_serial[i] = nl_i.atom_serial[atomnumbers_in_nl_lin];
|
||||
excluded_atom_numbers_count = excluded_atom_numbers_count + 1;
|
||||
i = i - 1;
|
||||
}
|
||||
}
|
||||
if (excluded_atom_numbers_count < excluded_atom_numbers_lin) {
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
nl[atom_i].atom_numbers = atomnumbers_in_nl_lin;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static __global__ void construct_neighbor_list_kernel(int atom_numbers, int max_neighbor_numbers, int *nl_atom_numbers,
|
||||
int *nl_atom_serial, NEIGHBOR_LIST *nl) {
|
||||
for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < atom_numbers; i += gridDim.x * blockDim.x) {
|
||||
nl[i].atom_numbers = nl_atom_numbers[i];
|
||||
nl[i].atom_serial = nl_atom_serial + i * max_neighbor_numbers;
|
||||
}
|
||||
}
|
||||
|
||||
static __global__ void copy_neighbor_list_atom_number(int atom_numbers, int max_neighbor_numbers, NEIGHBOR_LIST *nl,
|
||||
int *nl_atom_numbers, int *nl_atom_serial) {
|
||||
int i, j;
|
||||
for (i = blockIdx.x * blockDim.x + threadIdx.x; i < atom_numbers; i += gridDim.x * blockDim.x) {
|
||||
nl_atom_numbers[i] = nl[i].atom_numbers;
|
||||
for (j = blockIdx.y * blockDim.y + threadIdx.y; j < max_neighbor_numbers; j += gridDim.y * blockDim.y) {
|
||||
if (j < nl_atom_numbers[i]) {
|
||||
nl_atom_serial[i * max_neighbor_numbers + j] = nl[i].atom_serial[j];
|
||||
} else {
|
||||
nl_atom_serial[i * max_neighbor_numbers + j] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static __global__ void Mul_half(float *src, float *dst) {
|
||||
int index = threadIdx.x;
|
||||
if (index < 3) {
|
||||
dst[index] = src[index] * 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
static __global__ void Mul_quarter(float *src, float *dst) {
|
||||
int index = threadIdx.x;
|
||||
if (index < 3) {
|
||||
dst[index] = src[index] * 0.25;
|
||||
}
|
||||
}
|
||||
|
||||
// old neighbor list update functions
|
||||
__global__ void Crd_To_Uint_Crd_Half(const int atom_numbers, float *scale_factor, const VECTOR *crd,
|
||||
UNSIGNED_INT_VECTOR *uint_crd) {
|
||||
int atom_i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (atom_i < atom_numbers) {
|
||||
uint_crd[atom_i].uint_x = crd[atom_i].x * scale_factor[0];
|
||||
uint_crd[atom_i].uint_y = crd[atom_i].y * scale_factor[1];
|
||||
uint_crd[atom_i].uint_z = crd[atom_i].z * scale_factor[2];
|
||||
uint_crd[atom_i].uint_x = uint_crd[atom_i].uint_x << 1;
|
||||
uint_crd[atom_i].uint_y = uint_crd[atom_i].uint_y << 1;
|
||||
uint_crd[atom_i].uint_z = uint_crd[atom_i].uint_z << 1;
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void Vector_Translation(const int vector_numbers, VECTOR *vec_list, const VECTOR translation_vec) {
|
||||
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (i < vector_numbers) {
|
||||
vec_list[i].x = vec_list[i].x + translation_vec.x;
|
||||
vec_list[i].y = vec_list[i].y + translation_vec.y;
|
||||
vec_list[i].z = vec_list[i].z + translation_vec.z;
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void Vector_Translation(const int vector_numbers, VECTOR *vec_list, const VECTOR *translation_vec) {
|
||||
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (i < vector_numbers) {
|
||||
vec_list[i].x = vec_list[i].x + translation_vec[0].x;
|
||||
vec_list[i].y = vec_list[i].y + translation_vec[0].y;
|
||||
vec_list[i].z = vec_list[i].z + translation_vec[0].z;
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void Is_need_refresh_neighbor_list_cuda(const int atom_numbers, const VECTOR *crd, const VECTOR *old_crd,
|
||||
const float half_skin_square, int *need_refresh_flag) {
|
||||
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (i < atom_numbers) {
|
||||
VECTOR r1 = crd[i];
|
||||
VECTOR r2 = old_crd[i];
|
||||
r1.x = r1.x - r2.x;
|
||||
r1.y = r1.y - r2.y;
|
||||
r1.z = r1.z - r2.z;
|
||||
float r1_2 = r1.x * r1.x + r1.y * r1.y + r1.z * r1.z;
|
||||
if (r1_2 > half_skin_square) {
|
||||
atomicExch(&need_refresh_flag[0], 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Refresh_Neighbor_List_Half(int *refresh_sign, const int thread, const int atom_numbers, VECTOR *crd,
|
||||
VECTOR *old_crd, UNSIGNED_INT_VECTOR *uint_crd, float *crd_to_uint_crd_cof,
|
||||
float *uint_dr_to_dr_cof, int *atom_in_grid_serial, const float skin, float *box_length,
|
||||
const GRID_POINTER *gpointer, GRID_BUCKET *bucket, int *atom_numbers_in_grid_bucket,
|
||||
NEIGHBOR_LIST *d_nl, int *excluded_list_start, int *excluded_list,
|
||||
int *excluded_numbers, float cutoff_skin_square, int grid_numbers,
|
||||
float *grid_length_inverse, int *grid_N, int nxy, cudaStream_t stream) {
|
||||
std::vector<int> h_refresh_sign(1);
|
||||
cudaMemcpyAsync(h_refresh_sign.data(), refresh_sign, sizeof(int), cudaMemcpyDeviceToHost, stream);
|
||||
if (h_refresh_sign[0] == 1) {
|
||||
VECTOR trans_vec = {-skin, -skin, -skin};
|
||||
Clear_Grid_Bucket<<<ceilf(static_cast<float>(grid_numbers) / thread), thread, 0, stream>>>(
|
||||
grid_numbers, atom_numbers_in_grid_bucket, bucket);
|
||||
|
||||
Vector_Translation<<<ceilf(static_cast<float>(atom_numbers) / thread), thread, 0, stream>>>(atom_numbers, crd,
|
||||
trans_vec);
|
||||
|
||||
Crd_Periodic_Map<<<ceilf(static_cast<float>(atom_numbers) / thread), thread, 0, stream>>>(atom_numbers, crd,
|
||||
box_length);
|
||||
|
||||
Find_Atom_In_Grid_Serial<<<ceilf(static_cast<float>(atom_numbers) / thread), thread, 0, stream>>>(
|
||||
atom_numbers, grid_length_inverse, crd, grid_N, nxy, atom_in_grid_serial);
|
||||
|
||||
trans_vec.x = -trans_vec.x;
|
||||
trans_vec.y = -trans_vec.y;
|
||||
trans_vec.z = -trans_vec.z;
|
||||
|
||||
Vector_Translation<<<ceilf(static_cast<float>(atom_numbers) / thread), thread, 0, stream>>>(atom_numbers, crd,
|
||||
trans_vec);
|
||||
|
||||
Copy_List<<<ceilf(static_cast<float>(3. * atom_numbers) / thread), thread, 0, stream>>>(
|
||||
3 * atom_numbers, reinterpret_cast<float *>(crd), reinterpret_cast<float *>(old_crd));
|
||||
|
||||
Put_Atom_In_Grid_Bucket<<<ceilf(static_cast<float>(atom_numbers) / thread), thread, 0, stream>>>(
|
||||
atom_numbers, atom_in_grid_serial, bucket, atom_numbers_in_grid_bucket);
|
||||
|
||||
Crd_To_Uint_Crd_Half<<<ceilf(static_cast<float>(atom_numbers) / thread), thread, 0, stream>>>(
|
||||
atom_numbers, crd_to_uint_crd_cof, crd, uint_crd);
|
||||
|
||||
Find_atom_neighbors<<<ceilf(static_cast<float>(atom_numbers) / thread), thread, 0, stream>>>(
|
||||
atom_numbers, uint_crd, uint_dr_to_dr_cof, atom_in_grid_serial, gpointer, bucket, atom_numbers_in_grid_bucket,
|
||||
d_nl, cutoff_skin_square);
|
||||
|
||||
Delete_Excluded_Atoms_Serial_In_Neighbor_List<<<ceilf(static_cast<float>(atom_numbers) / thread), thread, 0,
|
||||
stream>>>(atom_numbers, d_nl, excluded_list_start, excluded_list,
|
||||
excluded_numbers);
|
||||
h_refresh_sign[0] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
void Refresh_Neighbor_List_First_Time(int *refresh_sign, const int thread, const int atom_numbers, VECTOR *crd,
|
||||
VECTOR *old_crd, UNSIGNED_INT_VECTOR *uint_crd, float *crd_to_uint_crd_cof,
|
||||
float *uint_dr_to_dr_cof, int *atom_in_grid_serial, const float skin,
|
||||
float *box_length, const GRID_POINTER *gpointer, GRID_BUCKET *bucket,
|
||||
int *atom_numbers_in_grid_bucket, NEIGHBOR_LIST *d_nl, int *excluded_list_start,
|
||||
int *excluded_list, int *excluded_numbers, float cutoff_skin_square,
|
||||
int grid_numbers, float *grid_length_inverse, int *grid_N, int nxy,
|
||||
cudaStream_t stream) {
|
||||
VECTOR trans_vec = {skin, skin, skin};
|
||||
Clear_Grid_Bucket<<<ceilf(static_cast<float>(grid_numbers) / 32), 32, 0, stream>>>(
|
||||
grid_numbers, atom_numbers_in_grid_bucket, bucket);
|
||||
Crd_Periodic_Map<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(atom_numbers, crd, box_length);
|
||||
Find_Atom_In_Grid_Serial<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
|
||||
atom_numbers, grid_length_inverse, crd, grid_N, nxy, atom_in_grid_serial);
|
||||
Vector_Translation<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(atom_numbers, crd, trans_vec);
|
||||
Copy_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 32), 32, 0, stream>>>(
|
||||
3 * atom_numbers, reinterpret_cast<float *>(crd), reinterpret_cast<float *>(old_crd));
|
||||
Put_Atom_In_Grid_Bucket<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
|
||||
atom_numbers, atom_in_grid_serial, bucket, atom_numbers_in_grid_bucket);
|
||||
Crd_To_Uint_Crd_Half<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
|
||||
atom_numbers, crd_to_uint_crd_cof, crd, uint_crd);
|
||||
|
||||
Find_atom_neighbors<<<ceilf(static_cast<float>(atom_numbers) / thread), thread, 0, stream>>>(
|
||||
atom_numbers, uint_crd, uint_dr_to_dr_cof, atom_in_grid_serial, gpointer, bucket, atom_numbers_in_grid_bucket, d_nl,
|
||||
cutoff_skin_square);
|
||||
Delete_Excluded_Atoms_Serial_In_Neighbor_List<<<ceilf(static_cast<float>(atom_numbers) / thread), thread, 0,
|
||||
stream>>>(atom_numbers, d_nl, excluded_list_start, excluded_list,
|
||||
excluded_numbers);
|
||||
}
|
||||
|
||||
__global__ void copy_neighbor_list_atom_number(int atom_numbers, NEIGHBOR_LIST *nl, int *nl_atom_numbers) {
|
||||
for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < atom_numbers; i += gridDim.x * blockDim.x) {
|
||||
nl_atom_numbers[i] = nl[i].atom_numbers;
|
||||
}
|
||||
}
|
||||
|
||||
void ConstructNeighborListHalf(int atom_numbers, int max_neighbor_numbers, int *nl_atom_numbers, int *nl_atom_serial,
|
||||
NEIGHBOR_LIST *nl, cudaStream_t stream) {
|
||||
construct_neighbor_list_kernel<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>(
|
||||
atom_numbers, max_neighbor_numbers, nl_atom_numbers, nl_atom_serial, nl);
|
||||
}
|
||||
|
||||
void CopyNeighborListHalf(int atom_numbers, NEIGHBOR_LIST *nl, int *nl_atom_numbers, cudaStream_t stream) {
|
||||
copy_neighbor_list_atom_number<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>(atom_numbers, nl,
|
||||
nl_atom_numbers);
|
||||
}
|
||||
|
||||
void Refresh_Neighbor_List_No_Check_Half(int grid_numbers, int atom_numbers, float skin, int nxy,
|
||||
float cutoff_skin_square, int *grid_N, float *box_length,
|
||||
int *atom_numbers_in_grid_bucket, float *grid_length_inverse,
|
||||
int *atom_in_grid_serial, GRID_BUCKET *bucket, VECTOR *crd, VECTOR *old_crd,
|
||||
float *crd_to_uint_crd_cof, UNSIGNED_INT_VECTOR *uint_crd,
|
||||
float *uint_dr_to_dr_cof, GRID_POINTER *gpointer, NEIGHBOR_LIST *d_nl,
|
||||
int *excluded_list_start, int *excluded_list, int *excluded_numbers,
|
||||
cudaStream_t stream) {
|
||||
VECTOR trans_vec = {-skin, -skin, -skin};
|
||||
|
||||
Clear_Grid_Bucket<<<ceilf(static_cast<float>(grid_numbers) / 32), 32, 0, stream>>>(
|
||||
grid_numbers, atom_numbers_in_grid_bucket, bucket);
|
||||
|
||||
Vector_Translation<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(atom_numbers, crd, trans_vec);
|
||||
|
||||
Crd_Periodic_Map<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(atom_numbers, crd, box_length);
|
||||
|
||||
Find_Atom_In_Grid_Serial<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
|
||||
atom_numbers, grid_length_inverse, crd, grid_N, nxy, atom_in_grid_serial);
|
||||
trans_vec.x = -trans_vec.x;
|
||||
trans_vec.y = -trans_vec.y;
|
||||
trans_vec.z = -trans_vec.z;
|
||||
Vector_Translation<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(atom_numbers, crd, trans_vec);
|
||||
|
||||
cudaMemcpyAsync(old_crd, crd, sizeof(VECTOR) * atom_numbers, cudaMemcpyDeviceToDevice, stream);
|
||||
|
||||
Put_Atom_In_Grid_Bucket<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
|
||||
atom_numbers, atom_in_grid_serial, bucket, atom_numbers_in_grid_bucket);
|
||||
|
||||
Crd_To_Uint_Crd_Half<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
|
||||
atom_numbers, crd_to_uint_crd_cof, crd, uint_crd);
|
||||
|
||||
Find_atom_neighbors<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
|
||||
atom_numbers, uint_crd, uint_dr_to_dr_cof, atom_in_grid_serial, gpointer, bucket, atom_numbers_in_grid_bucket, d_nl,
|
||||
cutoff_skin_square);
|
||||
|
||||
Delete_Excluded_Atoms_Serial_In_Neighbor_List<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
|
||||
atom_numbers, d_nl, excluded_list_start, excluded_list, excluded_numbers);
|
||||
}
|
||||
|
||||
void NeighborListUpdate(int grid_numbers, int atom_numbers, int *d_refresh_count, int refresh_interval,
|
||||
int not_first_time, float skin, int nxy, float cutoff_square, float cutoff_with_skin_square,
|
||||
int *grid_N, float *box_length, int *atom_numbers_in_grid_bucket, float *grid_length_inverse,
|
||||
int *atom_in_grid_serial, GRID_BUCKET *bucket, float *crd, float *old_crd,
|
||||
float *crd_to_uint_crd_cof, float *half_crd_to_uint_crd_cof, unsigned int *uint_crd,
|
||||
float *uint_dr_to_dr_cof, GRID_POINTER *gpointer, NEIGHBOR_LIST *d_nl, int *excluded_list_start,
|
||||
int *excluded_list, int *excluded_numbers, float half_skin_square,
|
||||
int *is_need_refresh_neighbor_list, cudaStream_t stream) {
|
||||
if (not_first_time) {
|
||||
if (refresh_interval > 0) {
|
||||
std::vector<int> refresh_count_list(1);
|
||||
cudaMemcpyAsync(refresh_count_list.data(), d_refresh_count, sizeof(int), cudaMemcpyDeviceToHost, stream);
|
||||
cudaStreamSynchronize(stream);
|
||||
int refresh_count = refresh_count_list[0];
|
||||
|
||||
if (refresh_count % refresh_interval == 0) {
|
||||
Mul_half<<<1, 3, 0, stream>>>(crd_to_uint_crd_cof, half_crd_to_uint_crd_cof);
|
||||
Refresh_Neighbor_List_No_Check_Half(
|
||||
grid_numbers, atom_numbers, skin, nxy, cutoff_square, grid_N, box_length, atom_numbers_in_grid_bucket,
|
||||
grid_length_inverse, atom_in_grid_serial, bucket, reinterpret_cast<VECTOR *>(crd),
|
||||
reinterpret_cast<VECTOR *>(old_crd), half_crd_to_uint_crd_cof,
|
||||
reinterpret_cast<UNSIGNED_INT_VECTOR *>(uint_crd), uint_dr_to_dr_cof, gpointer, d_nl, excluded_list_start,
|
||||
excluded_list, excluded_numbers, stream);
|
||||
}
|
||||
refresh_count += 1;
|
||||
cudaMemcpyAsync(d_refresh_count, &refresh_count, sizeof(int), cudaMemcpyHostToDevice, stream);
|
||||
} else {
|
||||
Is_need_refresh_neighbor_list_cuda<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>(
|
||||
atom_numbers, reinterpret_cast<VECTOR *>(crd), reinterpret_cast<VECTOR *>(old_crd), half_skin_square,
|
||||
is_need_refresh_neighbor_list);
|
||||
Mul_half<<<1, 3, 0, stream>>>(crd_to_uint_crd_cof, half_crd_to_uint_crd_cof);
|
||||
Refresh_Neighbor_List_Half(is_need_refresh_neighbor_list, 32, atom_numbers, reinterpret_cast<VECTOR *>(crd),
|
||||
reinterpret_cast<VECTOR *>(old_crd), reinterpret_cast<UNSIGNED_INT_VECTOR *>(uint_crd),
|
||||
half_crd_to_uint_crd_cof, uint_dr_to_dr_cof, atom_in_grid_serial, skin, box_length,
|
||||
gpointer, bucket, atom_numbers_in_grid_bucket, d_nl, excluded_list_start,
|
||||
excluded_list, excluded_numbers, cutoff_with_skin_square, grid_numbers,
|
||||
grid_length_inverse, grid_N, nxy, stream);
|
||||
}
|
||||
} else {
|
||||
Mul_half<<<1, 3, 0, stream>>>(crd_to_uint_crd_cof, half_crd_to_uint_crd_cof);
|
||||
Refresh_Neighbor_List_First_Time(
|
||||
is_need_refresh_neighbor_list, 32, atom_numbers, reinterpret_cast<VECTOR *>(crd),
|
||||
reinterpret_cast<VECTOR *>(old_crd), reinterpret_cast<UNSIGNED_INT_VECTOR *>(uint_crd), half_crd_to_uint_crd_cof,
|
||||
uint_dr_to_dr_cof, atom_in_grid_serial, skin, box_length, gpointer, bucket, atom_numbers_in_grid_bucket, d_nl,
|
||||
excluded_list_start, excluded_list, excluded_numbers, cutoff_with_skin_square, grid_numbers, grid_length_inverse,
|
||||
grid_N, nxy, stream);
|
||||
}
|
||||
}
|
||||
|
||||
// new neighbor list update functions
|
||||
|
||||
__device__ __host__ VECTOR Get_Periodic_Displacement_Update(const VECTOR vec_a, const VECTOR vec_b,
|
||||
const VECTOR box_length) {
|
||||
VECTOR dr;
|
||||
dr.x = vec_a.x - vec_b.x;
|
||||
dr.y = vec_a.y - vec_b.y;
|
||||
dr.x = vec_a.z - vec_b.z;
|
||||
|
||||
dr.x = dr.x - floorf(dr.x / box_length.x + 0.5) * box_length.x;
|
||||
dr.y = dr.y - floorf(dr.y / box_length.y + 0.5) * box_length.y;
|
||||
dr.z = dr.z - floorf(dr.z / box_length.z + 0.5) * box_length.z;
|
||||
return dr;
|
||||
}
|
||||
|
||||
__global__ void Is_need_refresh_neighbor_list_cuda(const int atom_numbers, const VECTOR *crd, const VECTOR *old_crd,
|
||||
const VECTOR *box_length, const float half_skin_square,
|
||||
int *need_refresh_flag) {
|
||||
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (i < atom_numbers) {
|
||||
VECTOR r1 = crd[i];
|
||||
VECTOR r2 = old_crd[i];
|
||||
r1 = Get_Periodic_Displacement_Update(r1, r2, box_length[0]);
|
||||
float r1_2 = r1.x * r1.x + r1.y * r1.y + r1.z * r1.z;
|
||||
if (r1_2 > half_skin_square) {
|
||||
atomicExch(&need_refresh_flag[0], 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Refresh_Neighbor_List(int *refresh_sign, const int thread, const int atom_numbers, VECTOR *crd, VECTOR *old_crd,
|
||||
UNSIGNED_INT_VECTOR *uint_crd, float *crd_to_uint_crd_cof, float *uint_dr_to_dr_cof,
|
||||
int *atom_in_grid_serial, const float skin, float *box_length, const GRID_POINTER *gpointer,
|
||||
GRID_BUCKET *bucket, int *atom_numbers_in_grid_bucket, NEIGHBOR_LIST *d_nl,
|
||||
int *excluded_list_start, int *excluded_list, int *excluded_numbers,
|
||||
float cutoff_skin_square, int grid_numbers, float *grid_length_inverse, int *grid_N, int nxy,
|
||||
cudaStream_t stream) {
|
||||
std::vector<int> h_refresh_sign(1);
|
||||
cudaMemcpyAsync(h_refresh_sign.data(), refresh_sign, sizeof(int), cudaMemcpyDeviceToHost, stream);
|
||||
if (h_refresh_sign[0] == 1) {
|
||||
Clear_Grid_Bucket<<<ceilf(static_cast<float>(grid_numbers) / thread), thread, 0, stream>>>(
|
||||
grid_numbers, atom_numbers_in_grid_bucket, bucket);
|
||||
|
||||
Crd_Periodic_Map<<<ceilf(static_cast<float>(atom_numbers) / thread), thread, 0, stream>>>(atom_numbers, crd,
|
||||
box_length);
|
||||
|
||||
Find_Atom_In_Grid_Serial<<<ceilf(static_cast<float>(atom_numbers) / thread), thread, 0, stream>>>(
|
||||
atom_numbers, grid_length_inverse, crd, grid_N, nxy, atom_in_grid_serial);
|
||||
|
||||
Copy_List<<<ceilf(static_cast<float>(3. * atom_numbers) / thread), thread, 0, stream>>>(
|
||||
3 * atom_numbers, reinterpret_cast<float *>(crd), reinterpret_cast<float *>(old_crd));
|
||||
|
||||
Put_Atom_In_Grid_Bucket<<<ceilf(static_cast<float>(atom_numbers) / thread), thread, 0, stream>>>(
|
||||
atom_numbers, atom_in_grid_serial, bucket, atom_numbers_in_grid_bucket);
|
||||
|
||||
Crd_To_Uint_Crd<<<ceilf(static_cast<float>(atom_numbers) / thread), thread, 0, stream>>>(
|
||||
atom_numbers, crd_to_uint_crd_cof, crd, uint_crd);
|
||||
|
||||
Find_atom_neighbors<<<ceilf(static_cast<float>(atom_numbers) / thread), thread, 0, stream>>>(
|
||||
atom_numbers, uint_crd, uint_dr_to_dr_cof, atom_in_grid_serial, gpointer, bucket, atom_numbers_in_grid_bucket,
|
||||
d_nl, cutoff_skin_square);
|
||||
|
||||
Delete_Excluded_Atoms_Serial_In_Neighbor_List<<<ceilf(static_cast<float>(atom_numbers) / thread), thread, 0,
|
||||
stream>>>(atom_numbers, d_nl, excluded_list_start, excluded_list,
|
||||
excluded_numbers);
|
||||
h_refresh_sign[0] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
void Refresh_Neighbor_List_No_Check(int grid_numbers, int atom_numbers, float skin, int nxy, float cutoff_skin_square,
|
||||
int *grid_N, float *box_length, int *atom_numbers_in_grid_bucket,
|
||||
float *grid_length_inverse, int *atom_in_grid_serial, GRID_BUCKET *bucket,
|
||||
VECTOR *crd, VECTOR *old_crd, float *crd_to_uint_crd_cof,
|
||||
UNSIGNED_INT_VECTOR *uint_crd, float *uint_dr_to_dr_cof, GRID_POINTER *gpointer,
|
||||
NEIGHBOR_LIST *d_nl, int *excluded_list_start, int *excluded_list,
|
||||
int *excluded_numbers, cudaStream_t stream) {
|
||||
Clear_Grid_Bucket<<<ceilf(static_cast<float>(grid_numbers) / 32), 32, 0, stream>>>(
|
||||
grid_numbers, atom_numbers_in_grid_bucket, bucket);
|
||||
|
||||
Crd_Periodic_Map<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(atom_numbers, crd, box_length);
|
||||
|
||||
Find_Atom_In_Grid_Serial<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
|
||||
atom_numbers, grid_length_inverse, crd, grid_N, nxy, atom_in_grid_serial);
|
||||
|
||||
cudaMemcpyAsync(old_crd, crd, sizeof(VECTOR) * atom_numbers, cudaMemcpyDeviceToDevice, stream);
|
||||
|
||||
Put_Atom_In_Grid_Bucket<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
|
||||
atom_numbers, atom_in_grid_serial, bucket, atom_numbers_in_grid_bucket);
|
||||
|
||||
Crd_To_Uint_Crd<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(atom_numbers, crd_to_uint_crd_cof,
|
||||
crd, uint_crd);
|
||||
|
||||
Find_atom_neighbors<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
|
||||
atom_numbers, uint_crd, uint_dr_to_dr_cof, atom_in_grid_serial, gpointer, bucket, atom_numbers_in_grid_bucket, d_nl,
|
||||
cutoff_skin_square);
|
||||
|
||||
Delete_Excluded_Atoms_Serial_In_Neighbor_List<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
|
||||
atom_numbers, d_nl, excluded_list_start, excluded_list, excluded_numbers);
|
||||
}
|
||||
|
||||
void CopyNeighborList(int atom_numbers, int max_neighbor_numbers, NEIGHBOR_LIST *nl, int *nl_atom_numbers,
|
||||
int *nl_atom_serial, cudaStream_t stream) {
|
||||
copy_neighbor_list_atom_number<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>(
|
||||
atom_numbers, max_neighbor_numbers, nl, nl_atom_numbers, nl_atom_serial);
|
||||
}
|
||||
|
||||
void ConstructNeighborList(int atom_numbers, int max_neighbor_numbers, int *nl_atom_numbers, int *nl_atom_serial,
|
||||
NEIGHBOR_LIST *nl, cudaStream_t stream) {
|
||||
construct_neighbor_list_kernel<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>(
|
||||
atom_numbers, max_neighbor_numbers, nl_atom_numbers, nl_atom_serial, nl);
|
||||
}
|
||||
|
||||
int refresh_count = 0;
|
||||
|
||||
void NeighborListRefresh(int grid_numbers, int atom_numbers, int *d_refresh_count, int refresh_interval,
|
||||
int not_first_time, float skin, int nxy, float cutoff_square, float cutoff_with_skin_square,
|
||||
int *grid_N, float *box_length, int *atom_numbers_in_grid_bucket, float *grid_length_inverse,
|
||||
int *atom_in_grid_serial, GRID_BUCKET *bucket, float *crd, float *old_crd,
|
||||
float *crd_to_uint_crd_cof, float *half_crd_to_uint_crd_cof, unsigned int *uint_crd,
|
||||
float *uint_dr_to_dr_cof, GRID_POINTER *gpointer, NEIGHBOR_LIST *d_nl,
|
||||
int *excluded_list_start, int *excluded_list, int *excluded_numbers, float half_skin_square,
|
||||
int *is_need_refresh_neighbor_list, int forced_update, int forced_check, cudaStream_t stream) {
|
||||
if (forced_update) {
|
||||
Mul_quarter<<<1, 3, 0, stream>>>(crd_to_uint_crd_cof, half_crd_to_uint_crd_cof);
|
||||
Refresh_Neighbor_List_No_Check(
|
||||
grid_numbers, atom_numbers, skin, nxy, cutoff_square, grid_N, box_length, atom_numbers_in_grid_bucket,
|
||||
grid_length_inverse, atom_in_grid_serial, bucket, reinterpret_cast<VECTOR *>(crd),
|
||||
reinterpret_cast<VECTOR *>(old_crd), half_crd_to_uint_crd_cof, reinterpret_cast<UNSIGNED_INT_VECTOR *>(uint_crd),
|
||||
uint_dr_to_dr_cof, gpointer, d_nl, excluded_list_start, excluded_list, excluded_numbers, stream);
|
||||
|
||||
} else if (refresh_interval > 0 && !forced_check) {
|
||||
if (refresh_count % refresh_interval == 0) {
|
||||
Mul_quarter<<<1, 3, 0, stream>>>(crd_to_uint_crd_cof, half_crd_to_uint_crd_cof);
|
||||
Refresh_Neighbor_List_No_Check(grid_numbers, atom_numbers, skin, nxy, cutoff_square, grid_N, box_length,
|
||||
atom_numbers_in_grid_bucket, grid_length_inverse, atom_in_grid_serial, bucket,
|
||||
reinterpret_cast<VECTOR *>(crd), reinterpret_cast<VECTOR *>(old_crd),
|
||||
half_crd_to_uint_crd_cof, reinterpret_cast<UNSIGNED_INT_VECTOR *>(uint_crd),
|
||||
uint_dr_to_dr_cof, gpointer, d_nl, excluded_list_start, excluded_list,
|
||||
excluded_numbers, stream);
|
||||
}
|
||||
refresh_count += 1;
|
||||
} else {
|
||||
Is_need_refresh_neighbor_list_cuda<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>(
|
||||
atom_numbers, reinterpret_cast<VECTOR *>(crd), reinterpret_cast<VECTOR *>(old_crd),
|
||||
reinterpret_cast<VECTOR *>(box_length), half_skin_square, is_need_refresh_neighbor_list);
|
||||
Mul_quarter<<<1, 3, 0, stream>>>(crd_to_uint_crd_cof, half_crd_to_uint_crd_cof);
|
||||
Refresh_Neighbor_List(is_need_refresh_neighbor_list, 32, atom_numbers, reinterpret_cast<VECTOR *>(crd),
|
||||
reinterpret_cast<VECTOR *>(old_crd), reinterpret_cast<UNSIGNED_INT_VECTOR *>(uint_crd),
|
||||
half_crd_to_uint_crd_cof, uint_dr_to_dr_cof, atom_in_grid_serial, skin, box_length, gpointer,
|
||||
bucket, atom_numbers_in_grid_bucket, d_nl, excluded_list_start, excluded_list,
|
||||
excluded_numbers, cutoff_with_skin_square, grid_numbers, grid_length_inverse, grid_N, nxy,
|
||||
stream);
|
||||
}
|
||||
}
|
|
@ -1,82 +0,0 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
/**
|
||||
* Note:
|
||||
* NeighborListUpdate. This is an experimental interface that is subject to change and/or deletion.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NEIGHBOR_LIST_IMPL_H_
|
||||
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NEIGHBOR_LIST_IMPL_H_
|
||||
#include <curand_kernel.h>
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
|
||||
|
||||
struct VECTOR {
|
||||
float x;
|
||||
float y;
|
||||
float z;
|
||||
};
|
||||
struct INT_VECTOR {
|
||||
int int_x;
|
||||
int int_y;
|
||||
int int_z;
|
||||
};
|
||||
struct UNSIGNED_INT_VECTOR {
|
||||
unsigned int uint_x;
|
||||
unsigned int uint_y;
|
||||
unsigned int uint_z;
|
||||
};
|
||||
struct NEIGHBOR_LIST {
|
||||
int atom_numbers;
|
||||
int *atom_serial;
|
||||
};
|
||||
struct GRID_BUCKET {
|
||||
int *atom_serial;
|
||||
};
|
||||
struct GRID_POINTER {
|
||||
int *grid_serial;
|
||||
};
|
||||
|
||||
CUDA_LIB_EXPORT void ConstructNeighborList(int grid_numbers, int max_neighbor_numbers, int *nl_atom_numbers,
|
||||
int *nl_atom_serial, NEIGHBOR_LIST *nl, cudaStream_t stream);
|
||||
|
||||
CUDA_LIB_EXPORT void CopyNeighborList(int atom_numbers, int max_neighbor_numbers, NEIGHBOR_LIST *nl,
|
||||
int *nl_atom_numbers, int *nl_atom_serial, cudaStream_t stream);
|
||||
|
||||
CUDA_LIB_EXPORT void NeighborListRefresh(
|
||||
int grid_numbers, int atom_numbers, int *d_refresh_count, int refresh_interval, int not_first_time, float skin,
|
||||
int nxy, float cutoff_square, float cutoff_with_skin_square, int *grid_N, float *box_length,
|
||||
int *atom_numbers_in_grid_bucket, float *grid_length_inverse, int *atom_in_grid_serial, GRID_BUCKET *bucket,
|
||||
float *crd, float *old_crd, float *crd_to_uint_crd_cof, float *half_crd_to_uint_crd_cof, unsigned int *uint_crd,
|
||||
float *uint_dr_to_dr_cof, GRID_POINTER *gpointer, NEIGHBOR_LIST *d_nl, int *excluded_list_start, int *excluded_list,
|
||||
int *excluded_numbers, float half_skin_square, int *is_need_refresh_neighbor_list, int forced_update,
|
||||
int forced_check, cudaStream_t stream);
|
||||
|
||||
CUDA_LIB_EXPORT void ConstructNeighborListHalf(int grid_numbers, int max_neighbor_numbers, int *nl_atom_numbers,
|
||||
int *nl_atom_serial, NEIGHBOR_LIST *nl, cudaStream_t stream);
|
||||
|
||||
CUDA_LIB_EXPORT void CopyNeighborListHalf(int atom_numbers, NEIGHBOR_LIST *nl, int *nl_atom_numbers,
|
||||
cudaStream_t stream);
|
||||
|
||||
CUDA_LIB_EXPORT void NeighborListUpdate(
|
||||
int grid_numbers, int atom_numbers, int *d_refresh_count, int refresh_interval, int not_first_time, float skin,
|
||||
int nxy, float cutoff_square, float cutoff_with_skin_square, int *grid_N, float *box_length,
|
||||
int *atom_numbers_in_grid_bucket, float *grid_length_inverse, int *atom_in_grid_serial, GRID_BUCKET *bucket,
|
||||
float *crd, float *old_crd, float *crd_to_uint_crd_cof, float *half_crd_to_uint_crd_cof, unsigned int *uint_crd,
|
||||
float *uint_dr_to_dr_cof, GRID_POINTER *gpointer, NEIGHBOR_LIST *d_nl, int *excluded_list_start, int *excluded_list,
|
||||
int *excluded_numbers, float half_skin_square, int *is_need_refresh_neighbor_list, cudaStream_t stream);
|
||||
|
||||
#endif
|
|
@ -1,41 +0,0 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/sponge/nvtit/md_iteration_gradient_descent_impl.cuh"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/util.cuh"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common_sponge.cuh"
|
||||
|
||||
__global__ void MD_Iteration_Gradient_Descent(const int atom_numbers, VECTOR *crd, VECTOR *frc,
|
||||
const float learning_rate) {
|
||||
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (i < atom_numbers) {
|
||||
crd[i].x = crd[i].x + learning_rate * frc[i].x;
|
||||
crd[i].y = crd[i].y + learning_rate * frc[i].y;
|
||||
crd[i].z = crd[i].z + learning_rate * frc[i].z;
|
||||
|
||||
frc[i].x = 0.;
|
||||
frc[i].y = 0.;
|
||||
frc[i].z = 0.;
|
||||
}
|
||||
}
|
||||
|
||||
void MDIterationGradientDescent(const int atom_numbers, float *crd, float *frc, const float learning_rate,
|
||||
cudaStream_t stream) {
|
||||
VECTOR *d_crd = reinterpret_cast<VECTOR *>(crd);
|
||||
VECTOR *d_frc = reinterpret_cast<VECTOR *>(frc);
|
||||
MD_Iteration_Gradient_Descent<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>(
|
||||
atom_numbers, d_crd, d_frc, learning_rate);
|
||||
}
|
|
@ -1,26 +0,0 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_MD_ITERATION_GRADIENT_DESCENT_IMPL_H_
|
||||
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_MD_ITERATION_GRADIENT_DESCENT_IMPL_H_
|
||||
|
||||
#include <curand_kernel.h>
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
|
||||
CUDA_LIB_EXPORT void MDIterationGradientDescent(const int atom_numbers, float *crd, float *frc,
|
||||
const float learning_rate, cudaStream_t stream);
|
||||
|
||||
#endif
|
|
@ -1,54 +0,0 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
/**
|
||||
* Note:
|
||||
* MDIterationLeapFrog. This is an experimental interface that is subject to change and/or deletion.
|
||||
*/
|
||||
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/sponge/nvtit/md_iteration_leap_frog_impl.cuh"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common_sponge.cuh"
|
||||
|
||||
__global__ void MD_Iteration_Leap_Frog(const int atom_numbers, VECTOR *vel, VECTOR *crd, VECTOR *frc, VECTOR *acc,
|
||||
const float *inverse_mass, const float dt) {
|
||||
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (i < atom_numbers) {
|
||||
acc[i].x = inverse_mass[i] * frc[i].x;
|
||||
acc[i].y = inverse_mass[i] * frc[i].y;
|
||||
acc[i].z = inverse_mass[i] * frc[i].z;
|
||||
|
||||
vel[i].x = vel[i].x + dt * acc[i].x;
|
||||
vel[i].y = vel[i].y + dt * acc[i].y;
|
||||
vel[i].z = vel[i].z + dt * acc[i].z;
|
||||
|
||||
crd[i].x = crd[i].x + dt * vel[i].x;
|
||||
crd[i].y = crd[i].y + dt * vel[i].y;
|
||||
crd[i].z = crd[i].z + dt * vel[i].z;
|
||||
|
||||
frc[i].x = 0.;
|
||||
frc[i].y = 0.;
|
||||
frc[i].z = 0.;
|
||||
}
|
||||
}
|
||||
|
||||
void MDIterationLeapFrog(const int atom_numbers, float *vel, float *crd, float *frc, float *acc,
|
||||
const float *inverse_mass, const float dt, cudaStream_t stream) {
|
||||
VECTOR *d_vel = reinterpret_cast<VECTOR *>(vel);
|
||||
VECTOR *d_crd = reinterpret_cast<VECTOR *>(crd);
|
||||
VECTOR *d_frc = reinterpret_cast<VECTOR *>(frc);
|
||||
VECTOR *d_acc = reinterpret_cast<VECTOR *>(acc);
|
||||
MD_Iteration_Leap_Frog<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>(
|
||||
atom_numbers, d_vel, d_crd, d_frc, d_acc, inverse_mass, dt);
|
||||
}
|
|
@ -1,31 +0,0 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
/**
|
||||
* Note:
|
||||
* MDIterationLeapFrog. This is an experimental interface that is subject to change and/or deletion.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NVTIT_MD_ITERATION_LEAP_FROG_IMPL_H
|
||||
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NVTIT_MD_ITERATION_LEAP_FROG_IMPL_H
|
||||
|
||||
#include <curand_kernel.h>
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
|
||||
|
||||
CUDA_LIB_EXPORT void MDIterationLeapFrog(const int atom_numbers, float *vel, float *crd, float *frc, float *acc,
|
||||
const float *inverse_mass, const float dt, cudaStream_t stream);
|
||||
|
||||
#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_MD_ITERATION_LEAP_FROG_LIUJIAN_GPU_IMPL_H_
|
|
@ -1,67 +0,0 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/sponge/nvtit/md_iteration_leap_frog_liujian_gpu_impl.cuh"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/util.cuh"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common_sponge.cuh"
|
||||
|
||||
__global__ void MD_Iteration_Leap_Frog_With_LiuJian_kernel(const int atom_numbers, const float half_dt, const float dt,
|
||||
const float exp_gamma, float *inverse_mass,
|
||||
float *sqrt_mass_inverse, VECTOR *vel, VECTOR *crd,
|
||||
VECTOR *frc, VECTOR *acc, VECTOR *random_frc,
|
||||
VECTOR *output) {
|
||||
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
|
||||
if (i < atom_numbers) {
|
||||
acc[i].x = inverse_mass[i] * frc[i].x;
|
||||
acc[i].y = inverse_mass[i] * frc[i].y;
|
||||
acc[i].z = inverse_mass[i] * frc[i].z;
|
||||
|
||||
vel[i].x = vel[i].x + dt * acc[i].x;
|
||||
vel[i].y = vel[i].y + dt * acc[i].y;
|
||||
vel[i].z = vel[i].z + dt * acc[i].z;
|
||||
|
||||
output[i].x = crd[i].x + half_dt * vel[i].x;
|
||||
output[i].y = crd[i].y + half_dt * vel[i].y;
|
||||
output[i].z = crd[i].z + half_dt * vel[i].z;
|
||||
|
||||
vel[i].x = exp_gamma * vel[i].x + sqrt_mass_inverse[i] * random_frc[i].x;
|
||||
vel[i].y = exp_gamma * vel[i].y + sqrt_mass_inverse[i] * random_frc[i].y;
|
||||
vel[i].z = exp_gamma * vel[i].z + sqrt_mass_inverse[i] * random_frc[i].z;
|
||||
|
||||
output[i].x = output[i].x + half_dt * vel[i].x;
|
||||
output[i].y = output[i].y + half_dt * vel[i].y;
|
||||
output[i].z = output[i].z + half_dt * vel[i].z;
|
||||
}
|
||||
}
|
||||
|
||||
void MD_Iteration_Leap_Frog_With_LiuJian(const int atom_numbers, const float half_dt, const float dt,
|
||||
const float exp_gamma, int float4_numbers, float *inverse_mass,
|
||||
float *sqrt_mass_inverse, float *vel, float *crd, float *frc, float *acc,
|
||||
curandStatePhilox4_32_10_t *rand_state, float *rand_frc, float *output,
|
||||
cudaStream_t stream) {
|
||||
Rand_Normal<<<ceilf(static_cast<float>(float4_numbers) / 32.), 32, 0, stream>>>(float4_numbers, rand_state,
|
||||
reinterpret_cast<float4 *>(rand_frc));
|
||||
VECTOR *d_vel = reinterpret_cast<VECTOR *>(vel);
|
||||
VECTOR *d_crd = reinterpret_cast<VECTOR *>(crd);
|
||||
VECTOR *d_frc = reinterpret_cast<VECTOR *>(frc);
|
||||
VECTOR *d_acc = reinterpret_cast<VECTOR *>(acc);
|
||||
VECTOR *d_rand_frc = reinterpret_cast<VECTOR *>(rand_frc);
|
||||
VECTOR *d_out = reinterpret_cast<VECTOR *>(output);
|
||||
MD_Iteration_Leap_Frog_With_LiuJian_kernel<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
|
||||
atom_numbers, half_dt, dt, exp_gamma, inverse_mass, sqrt_mass_inverse, d_vel, d_crd, d_frc, d_acc, d_rand_frc,
|
||||
d_out);
|
||||
}
|
|
@ -1,29 +0,0 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_MD_ITERATION_LEAP_FROG_LIUJIAN_GPU_IMPL_H_
|
||||
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_MD_ITERATION_LEAP_FROG_LIUJIAN_GPU_IMPL_H_
|
||||
|
||||
#include <curand_kernel.h>
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
|
||||
CUDA_LIB_EXPORT void MD_Iteration_Leap_Frog_With_LiuJian(const int atom_numbers, const float half_dt, const float dt,
|
||||
const float exp_gamma, int float4_numbers, float *inverse_mass,
|
||||
float *sqrt_mass_inverse, float *vel, float *crd, float *frc,
|
||||
float *acc, curandStatePhilox4_32_10_t *rand_state,
|
||||
float *rand_frc, float *output, cudaStream_t stream);
|
||||
|
||||
#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_MD_ITERATION_LEAP_FROG_LIUJIAN_GPU_IMPL_H_
|
|
@ -1,80 +0,0 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/sponge/nvtit/md_iteration_leap_frog_liujian_with_max_vel_impl.cuh"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/util.cuh"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common_sponge.cuh"
|
||||
|
||||
__global__ void MD_Iteration_Leap_Frog_With_LiuJian_With_Max_Velocity(
|
||||
const int atom_numbers, const float half_dt, const float dt, const float exp_gamma, const float *inverse_mass,
|
||||
const float *sqrt_mass_inverse, VECTOR *vel, VECTOR *crd, VECTOR *frc, VECTOR *acc, VECTOR *random_frc,
|
||||
VECTOR *output, const float max_vel) {
|
||||
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
float abs_vel;
|
||||
if (i < atom_numbers) {
|
||||
acc[i].x = inverse_mass[i] * frc[i].x;
|
||||
acc[i].y = inverse_mass[i] * frc[i].y;
|
||||
acc[i].z = inverse_mass[i] * frc[i].z;
|
||||
|
||||
vel[i].x = vel[i].x + dt * acc[i].x;
|
||||
vel[i].y = vel[i].y + dt * acc[i].y;
|
||||
vel[i].z = vel[i].z + dt * acc[i].z;
|
||||
|
||||
abs_vel = norm3df(vel[i].x, vel[i].y, vel[i].z);
|
||||
if (abs_vel < max_vel) {
|
||||
} else {
|
||||
abs_vel = max_vel / abs_vel;
|
||||
vel[i].x = abs_vel * vel[i].x;
|
||||
vel[i].y = abs_vel * vel[i].y;
|
||||
vel[i].z = abs_vel * vel[i].z;
|
||||
}
|
||||
|
||||
output[i].x = crd[i].x + half_dt * vel[i].x;
|
||||
output[i].y = crd[i].y + half_dt * vel[i].y;
|
||||
output[i].z = crd[i].z + half_dt * vel[i].z;
|
||||
|
||||
vel[i].x = exp_gamma * vel[i].x + sqrt_mass_inverse[i] * random_frc[i].x;
|
||||
vel[i].y = exp_gamma * vel[i].y + sqrt_mass_inverse[i] * random_frc[i].y;
|
||||
vel[i].z = exp_gamma * vel[i].z + sqrt_mass_inverse[i] * random_frc[i].z;
|
||||
|
||||
output[i].x = output[i].x + half_dt * vel[i].x;
|
||||
output[i].y = output[i].y + half_dt * vel[i].y;
|
||||
output[i].z = output[i].z + half_dt * vel[i].z;
|
||||
|
||||
frc[i].x = 0.;
|
||||
frc[i].y = 0.;
|
||||
frc[i].z = 0.;
|
||||
}
|
||||
}
|
||||
void MD_Iteration_Leap_Frog_With_LiuJian_With_Max_Vel(const int atom_numbers, const float half_dt, const float dt,
|
||||
const float exp_gamma, int float4_numbers, float *inverse_mass,
|
||||
float *sqrt_mass_inverse, float *vel, float *crd, float *frc,
|
||||
float *acc, curandStatePhilox4_32_10_t *rand_state,
|
||||
float *rand_frc, float *output, const float max_vel,
|
||||
cudaStream_t stream) {
|
||||
Rand_Normal<<<ceilf(static_cast<float>(float4_numbers) / 32.), 32, 0, stream>>>(float4_numbers, rand_state,
|
||||
reinterpret_cast<float4 *>(rand_frc));
|
||||
VECTOR *d_vel = reinterpret_cast<VECTOR *>(vel);
|
||||
VECTOR *d_crd = reinterpret_cast<VECTOR *>(crd);
|
||||
VECTOR *d_frc = reinterpret_cast<VECTOR *>(frc);
|
||||
VECTOR *d_acc = reinterpret_cast<VECTOR *>(acc);
|
||||
VECTOR *d_rand_frc = reinterpret_cast<VECTOR *>(rand_frc);
|
||||
VECTOR *d_out = reinterpret_cast<VECTOR *>(output);
|
||||
MD_Iteration_Leap_Frog_With_LiuJian_With_Max_Velocity<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0,
|
||||
stream>>>(atom_numbers, half_dt, dt, exp_gamma, inverse_mass,
|
||||
sqrt_mass_inverse, d_vel, d_crd, d_frc, d_acc,
|
||||
d_rand_frc, d_out, max_vel);
|
||||
}
|
|
@ -1,28 +0,0 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_MD_ITERATION_LEAP_FROG_LIUJIAN_WITH_MAX_VEL_IMPL_H_
|
||||
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_MD_ITERATION_LEAP_FROG_LIUJIAN_WITH_MAX_VEL_IMPL_H_
|
||||
|
||||
#include <curand_kernel.h>
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
|
||||
CUDA_LIB_EXPORT void MD_Iteration_Leap_Frog_With_LiuJian_With_Max_Vel(
|
||||
const int atom_numbers, const float half_dt, const float dt, const float exp_gamma, int float4_numbers,
|
||||
float *inverse_mass, float *sqrt_mass_inverse, float *vel, float *crd, float *frc, float *acc,
|
||||
curandStatePhilox4_32_10_t *rand_state, float *rand_frc, float *output, const float max_vel, cudaStream_t stream);
|
||||
|
||||
#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_MD_ITERATION_LEAP_FROG_LIUJIAN_WITH_MAX_VEL_IMPL_H_
|
|
@ -1,44 +0,0 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/sponge/nvtit/md_iteration_leap_frog_with_max_vel_impl.cuh"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/util.cuh"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common_sponge.cuh"
|
||||
|
||||
__global__ void MD_Iteration_Leap_Frog_With_Max_Velocity(const int atom_numbers, VECTOR *vel, VECTOR *crd, VECTOR *frc,
|
||||
VECTOR *acc, const float *inverse_mass, const float dt,
|
||||
const float max_velocity) {
|
||||
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (i < atom_numbers) {
|
||||
VECTOR acc_i = inverse_mass[i] * frc[i];
|
||||
VECTOR vel_i = vel[i] + dt * acc_i;
|
||||
vel_i = Make_Vector_Not_Exceed_Value(vel_i, max_velocity);
|
||||
vel[i] = vel_i;
|
||||
crd[i] = crd[i] + dt * vel_i;
|
||||
frc[i] = {0.0f, 0.0f, 0.0f};
|
||||
}
|
||||
}
|
||||
|
||||
void MDIterationLeapFrogWithMaxVelocity(const int atom_numbers, float *vel, float *crd, float *frc, float *acc,
|
||||
const float *inverse_mass, const float dt, const float max_velocity,
|
||||
cudaStream_t stream) {
|
||||
VECTOR *d_vel = reinterpret_cast<VECTOR *>(vel);
|
||||
VECTOR *d_crd = reinterpret_cast<VECTOR *>(crd);
|
||||
VECTOR *d_frc = reinterpret_cast<VECTOR *>(frc);
|
||||
VECTOR *d_acc = reinterpret_cast<VECTOR *>(acc);
|
||||
MD_Iteration_Leap_Frog_With_Max_Velocity<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>(
|
||||
atom_numbers, d_vel, d_crd, d_frc, d_acc, inverse_mass, dt, max_velocity);
|
||||
}
|
|
@ -1,27 +0,0 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_MD_ITERATION_LEAP_FROG_WITH_MAX_VEL_IMPL_H_
|
||||
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_MD_ITERATION_LEAP_FROG_WITH_MAX_VEL_IMPL_H_
|
||||
|
||||
#include <curand_kernel.h>
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
|
||||
CUDA_LIB_EXPORT void MDIterationLeapFrogWithMaxVelocity(const int atom_numbers, float *vel, float *crd, float *frc,
|
||||
float *acc, const float *inverse_mass, const float dt,
|
||||
const float max_velocity, cudaStream_t stream);
|
||||
|
||||
#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_MD_ITERATION_LEAP_FROG_WITH_MAX_VEL_IMPL_H_
|
|
@ -1,28 +0,0 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/sponge/nvtit/md_iteration_setup_random_state_gpu_impl.cuh"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/util.cuh"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common_sponge.cuh"
|
||||
|
||||
void MD_Iteration_Setup_Random_State(int float4_numbers, curandStatePhilox4_32_10_t *rand_state, int seed,
|
||||
cudaStream_t stream) {
|
||||
Setup_Rand_Normal_Kernel<<<ceilf(static_cast<float>(float4_numbers) / 32.), 32, 0, stream>>>(float4_numbers,
|
||||
rand_state, seed);
|
||||
}
|
||||
|
||||
void MD_Iteration_Setup_Random_State(int float4_numbers, curandStatePhilox4_32_10_t *rand_state, int seed,
|
||||
cudaStream_t stream);
|
|
@ -1,25 +0,0 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_MD_ITERATION_SETUP_RANDOM_STATE_GPU_IMPL_H_
|
||||
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_MD_ITERATION_SETUP_RANDOM_STATE_GPU_IMPL_H_
|
||||
|
||||
#include <curand_kernel.h>
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
|
||||
|
||||
CUDA_LIB_EXPORT void MD_Iteration_Setup_Random_State(int float4_numbers, curandStatePhilox4_32_10_t *rand_state,
|
||||
int seed, cudaStream_t stream);
|
||||
#endif
|
|
@ -1,27 +0,0 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/sponge/pme/fft_3d_impl.cuh"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/sponge/pme/pme_common.cuh"
|
||||
|
||||
template <typename T>
|
||||
void FFT3D(int Nfft, T *input_tensor, Complex<T> *output_tensor, const cufftHandle &FFT_plan_r2c, cudaStream_t stream) {
|
||||
cufftExecR2C(FFT_plan_r2c, input_tensor, reinterpret_cast<cufftComplex *>(output_tensor));
|
||||
return;
|
||||
}
|
||||
|
||||
template CUDA_LIB_EXPORT
|
||||
void FFT3D<float>(int Nfft, float *input_tensor, Complex<float> *output_tensor,
|
||||
const cufftHandle &FFT_plan_r2c, cudaStream_t stream);
|
|
@ -1,28 +0,0 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_PME_FFT_3D_IMPL_H_
|
||||
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_PME_FFT_3D_IMPL_H_
|
||||
|
||||
#include <cufft.h>
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/complex.h"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
|
||||
|
||||
template <typename T>
|
||||
CUDA_LIB_EXPORT void FFT3D(int Nfft, T *input_tensor, Complex<T> *output_tensor, const cufftHandle &FFT_plan_r2c,
|
||||
cudaStream_t stream);
|
||||
|
||||
#endif
|
|
@ -1,28 +0,0 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/sponge/pme/ifft_3d_impl.cuh"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/sponge/pme/pme_common.cuh"
|
||||
|
||||
template <typename T>
|
||||
void IFFT3D(int Nfft, Complex<T> *input_tensor, T *output_tensor, const cufftHandle &FFT_plan_c2r,
|
||||
cudaStream_t stream) {
|
||||
cufftExecC2R(FFT_plan_c2r, reinterpret_cast<cufftComplex *>(input_tensor), output_tensor);
|
||||
return;
|
||||
}
|
||||
|
||||
template CUDA_LIB_EXPORT
|
||||
void IFFT3D<float>(int Nfft, Complex<float> *input_tensor, float *output_tensor,
|
||||
const cufftHandle &FFT_plan_c2r, cudaStream_t stream);
|
|
@ -1,28 +0,0 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_PME_IFFT_3D_IMPL_H_
|
||||
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_PME_IFFT_3D_IMPL_H_
|
||||
|
||||
#include <cufft.h>
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/complex.h"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
|
||||
|
||||
template <typename T>
|
||||
CUDA_LIB_EXPORT void IFFT3D(int Nfft, Complex<T> *input_tensor, T *output_tensor, const cufftHandle &FFT_plan_c2r,
|
||||
cudaStream_t stream);
|
||||
|
||||
#endif
|
|
@ -1,29 +0,0 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/sponge/pme/pme_batched_fft_2d_impl.cuh"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/sponge/pme/pme_common.cuh"
|
||||
|
||||
template <typename T>
|
||||
void PMEBatchedFFT2D(Complex<T> *input_tensor, Complex<T> *output_tensor,
|
||||
const cufftHandle &FFT_plan_c2c, int direction, cudaStream_t stream) {
|
||||
cufftExecC2C(FFT_plan_c2c, reinterpret_cast<cufftComplex *>(input_tensor),
|
||||
reinterpret_cast<cufftComplex *>(output_tensor), direction);
|
||||
return;
|
||||
}
|
||||
|
||||
template CUDA_LIB_EXPORT
|
||||
void PMEBatchedFFT2D<float>(Complex<float> *input_tensor, Complex<float> *output_tensor,
|
||||
const cufftHandle &FFT_plan_c2c, int direction, cudaStream_t stream);
|
|
@ -1,28 +0,0 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_PME_PME_BATCHED_FFT_2D_IMPL_H_
|
||||
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_PME_PME_BATCHED_FFT_2D_IMPL_H_
|
||||
|
||||
#include <cufft.h>
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/complex.h"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
|
||||
|
||||
template <typename T>
|
||||
CUDA_LIB_EXPORT void PMEBatchedFFT2D(Complex<T> *input_tensor, Complex<T> *output_tensor,
|
||||
const cufftHandle &FFT_plan_c2c, int direction, cudaStream_t stream);
|
||||
|
||||
#endif
|
|
@ -1,357 +0,0 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
/**
|
||||
* Note:
|
||||
* PME_Common. This is an experimental interface that is subject to change and/or deletion.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_PME_PME_COMMON_H_
|
||||
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_PME_PME_COMMON_H_
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common_sponge.cuh"
|
||||
__constant__ float PME_Ma[4] = {1.0 / 6.0, -0.5, 0.5, -1.0 / 6.0};
|
||||
__constant__ float PME_Mb[4] = {0, 0.5, -1, 0.5};
|
||||
__constant__ float PME_Mc[4] = {0, 0.5, 0, -0.5};
|
||||
__constant__ float PME_Md[4] = {0, 1.0 / 6.0, 4.0 / 6.0, 1.0 / 6.0};
|
||||
__constant__ float PME_dMa[4] = {0.5, -1.5, 1.5, -0.5};
|
||||
__constant__ float PME_dMb[4] = {0, 1, -2, 1};
|
||||
__constant__ float PME_dMc[4] = {0, 0.5, 0, -0.5};
|
||||
#define PI 3.1415926
|
||||
const float periodic_factor_inverse = 2.3283064365387e-10;
|
||||
static dim3 thread_PME;
|
||||
|
||||
const float cutoff = 10.0;
|
||||
const float tolerance = 0.00001;
|
||||
|
||||
static float M_(float u, int n) {
|
||||
if (n == 2) {
|
||||
if (u > 2 || u < 0) return 0;
|
||||
return 1 - abs(u - 1);
|
||||
} else {
|
||||
return u / (n - 1) * M_(u, n - 1) + (n - u) / (n - 1) * M_(u - 1, n - 1);
|
||||
}
|
||||
}
|
||||
|
||||
static float Get_Beta(float cutoff, float tolerance) {
|
||||
float beta, low, high, tempf;
|
||||
int ilow, ihigh;
|
||||
|
||||
high = 1.0;
|
||||
ihigh = 1;
|
||||
|
||||
while (1) {
|
||||
tempf = erfc(high * cutoff) / cutoff;
|
||||
if (tempf <= tolerance) break;
|
||||
high *= 2;
|
||||
ihigh++;
|
||||
}
|
||||
|
||||
ihigh += 50;
|
||||
low = 0.0;
|
||||
for (ilow = 1; ilow < ihigh; ilow++) {
|
||||
beta = (low + high) / 2;
|
||||
tempf = erfc(beta * cutoff) / cutoff;
|
||||
if (tempf >= tolerance)
|
||||
low = beta;
|
||||
else
|
||||
high = beta;
|
||||
}
|
||||
return beta;
|
||||
}
|
||||
|
||||
static cufftComplex expc(cufftComplex z) {
|
||||
cufftComplex res;
|
||||
float t = expf(z.x);
|
||||
sincosf(z.y, &res.y, &res.x);
|
||||
res.x *= t;
|
||||
res.y *= t;
|
||||
return res;
|
||||
}
|
||||
|
||||
static float getb(int k, int NFFT, int B_order) {
|
||||
cufftComplex tempc, tempc2, res;
|
||||
float tempf;
|
||||
tempc2.x = 0;
|
||||
tempc2.y = 0;
|
||||
|
||||
tempc.x = 0;
|
||||
tempc.y = 2 * (B_order - 1) * PI * k / NFFT;
|
||||
res = expc(tempc);
|
||||
|
||||
for (int kk = 0; kk < (B_order - 1); kk++) {
|
||||
tempc.x = 0;
|
||||
tempc.y = 2 * PI * k / NFFT * kk;
|
||||
tempc = expc(tempc);
|
||||
tempf = M_(kk + 1, B_order);
|
||||
tempc2.x += tempf * tempc.x;
|
||||
tempc2.y += tempf * tempc.y;
|
||||
}
|
||||
res = cuCdivf(res, tempc2);
|
||||
return res.x * res.x + res.y * res.y;
|
||||
}
|
||||
|
||||
__global__ static void device_add(float *ene, float *factor, float *charge_sum) {
|
||||
ene[0] += factor[0] * charge_sum[0] * charge_sum[0];
|
||||
}
|
||||
|
||||
__global__ static void PME_Atom_Near(const UNSIGNED_INT_VECTOR *uint_crd, int *PME_atom_near, const int PME_Nin,
|
||||
const float periodic_factor_inverse_x, const float periodic_factor_inverse_y,
|
||||
const float periodic_factor_inverse_z, const int atom_numbers, const int fftx,
|
||||
const int ffty, const int fftz, const UNSIGNED_INT_VECTOR *PME_kxyz,
|
||||
UNSIGNED_INT_VECTOR *PME_uxyz, VECTOR *PME_frxyz) {
|
||||
int atom = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (atom < atom_numbers) {
|
||||
UNSIGNED_INT_VECTOR *temp_uxyz = &PME_uxyz[atom];
|
||||
int k, tempux, tempuy, tempuz;
|
||||
float tempf;
|
||||
tempf = static_cast<float>(uint_crd[atom].uint_x) * periodic_factor_inverse_x;
|
||||
tempux = static_cast<int>(tempf);
|
||||
PME_frxyz[atom].x = tempf - tempux;
|
||||
|
||||
tempf = static_cast<float>(uint_crd[atom].uint_y) * periodic_factor_inverse_y;
|
||||
tempuy = static_cast<int>(tempf);
|
||||
PME_frxyz[atom].y = tempf - tempuy;
|
||||
|
||||
tempf = static_cast<float>(uint_crd[atom].uint_z) * periodic_factor_inverse_z;
|
||||
tempuz = static_cast<int>(tempf);
|
||||
PME_frxyz[atom].z = tempf - tempuz;
|
||||
|
||||
if (tempux != (*temp_uxyz).uint_x || tempuy != (*temp_uxyz).uint_y || tempuz != (*temp_uxyz).uint_z) {
|
||||
(*temp_uxyz).uint_x = tempux;
|
||||
(*temp_uxyz).uint_y = tempuy;
|
||||
(*temp_uxyz).uint_z = tempuz;
|
||||
int *temp_near = PME_atom_near + atom * 64;
|
||||
int kx, ky, kz;
|
||||
for (k = 0; k < 64; k++) {
|
||||
UNSIGNED_INT_VECTOR temp_kxyz = PME_kxyz[k];
|
||||
kx = tempux - temp_kxyz.uint_x;
|
||||
if (kx < 0) kx += fftx;
|
||||
if (kx > fftx) kx -= fftx;
|
||||
ky = tempuy - temp_kxyz.uint_y;
|
||||
if (ky < 0) ky += ffty;
|
||||
if (ky > ffty) ky -= ffty;
|
||||
kz = tempuz - temp_kxyz.uint_z;
|
||||
if (kz < 0) kz += fftz;
|
||||
if (kz > fftz) kz -= fftz;
|
||||
temp_near[k] = kx * PME_Nin + ky * fftz + kz;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__global__ static void PME_Q_Spread(int *PME_atom_near, const float *charge, const VECTOR *PME_frxyz, float *PME_Q,
|
||||
const UNSIGNED_INT_VECTOR *PME_kxyz, const int atom_numbers) {
|
||||
int atom = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
|
||||
if (atom < atom_numbers) {
|
||||
int k;
|
||||
float tempf, tempQ, tempf2;
|
||||
|
||||
int *temp_near = PME_atom_near + atom * 64;
|
||||
VECTOR temp_frxyz = PME_frxyz[atom];
|
||||
float tempcharge = charge[atom];
|
||||
|
||||
UNSIGNED_INT_VECTOR temp_kxyz;
|
||||
unsigned int kx;
|
||||
|
||||
for (k = threadIdx.y; k < 64; k = k + blockDim.y) {
|
||||
temp_kxyz = PME_kxyz[k];
|
||||
kx = temp_kxyz.uint_x;
|
||||
tempf = (temp_frxyz.x);
|
||||
tempf2 = tempf * tempf;
|
||||
tempf = PME_Ma[kx] * tempf * tempf2 + PME_Mb[kx] * tempf2 + PME_Mc[kx] * tempf + PME_Md[kx];
|
||||
|
||||
tempQ = tempcharge * tempf;
|
||||
|
||||
kx = temp_kxyz.uint_y;
|
||||
tempf = (temp_frxyz.y);
|
||||
tempf2 = tempf * tempf;
|
||||
tempf = PME_Ma[kx] * tempf * tempf2 + PME_Mb[kx] * tempf2 + PME_Mc[kx] * tempf + PME_Md[kx];
|
||||
|
||||
tempQ = tempQ * tempf;
|
||||
|
||||
kx = temp_kxyz.uint_z;
|
||||
tempf = (temp_frxyz.z);
|
||||
tempf2 = tempf * tempf;
|
||||
tempf = PME_Ma[kx] * tempf * tempf2 + PME_Mb[kx] * tempf2 + PME_Mc[kx] * tempf + PME_Md[kx];
|
||||
tempQ = tempQ * tempf;
|
||||
|
||||
atomicAdd(&PME_Q[temp_near[k]], tempQ);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__global__ static void PME_Direct_Energy(const int atom_numbers, const NEIGHBOR_LIST *nl,
|
||||
const UNSIGNED_INT_VECTOR *uint_crd, const VECTOR *boxlength,
|
||||
const float *charge, const float beta, const float cutoff_square,
|
||||
float *direct_ene) {
|
||||
int atom_i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (atom_i < atom_numbers) {
|
||||
NEIGHBOR_LIST nl_i = nl[atom_i];
|
||||
int N = nl_i.atom_numbers;
|
||||
int atom_j;
|
||||
int int_x;
|
||||
int int_y;
|
||||
int int_z;
|
||||
UNSIGNED_INT_VECTOR r1 = uint_crd[atom_i], r2;
|
||||
VECTOR dr;
|
||||
float dr2;
|
||||
float dr_abs;
|
||||
// float dr_inverse;
|
||||
float ene_temp;
|
||||
float charge_i = charge[atom_i];
|
||||
float ene_lin = 0.;
|
||||
|
||||
// int x, y;
|
||||
// int atom_pair_LJ_type;
|
||||
for (int j = threadIdx.y; j < N; j = j + blockDim.y) {
|
||||
atom_j = nl_i.atom_serial[j];
|
||||
r2 = uint_crd[atom_j];
|
||||
|
||||
int_x = r2.uint_x - r1.uint_x;
|
||||
int_y = r2.uint_y - r1.uint_y;
|
||||
int_z = r2.uint_z - r1.uint_z;
|
||||
dr.x = boxlength[0].x * int_x;
|
||||
dr.y = boxlength[0].y * int_y;
|
||||
dr.z = boxlength[0].z * int_z;
|
||||
|
||||
dr2 = dr.x * dr.x + dr.y * dr.y + dr.z * dr.z;
|
||||
if (dr2 < cutoff_square) {
|
||||
dr_abs = norm3df(dr.x, dr.y, dr.z);
|
||||
ene_temp = charge_i * charge[atom_j] * erfcf(beta * dr_abs) / dr_abs;
|
||||
ene_lin = ene_lin + ene_temp;
|
||||
}
|
||||
}
|
||||
atomicAdd(direct_ene, ene_lin);
|
||||
}
|
||||
}
|
||||
|
||||
__global__ static void PME_Direct_Atom_Energy(const int atom_numbers, const NEIGHBOR_LIST *nl,
|
||||
const UNSIGNED_INT_VECTOR *uint_crd, const VECTOR *boxlength,
|
||||
const float *charge, const float beta, const float cutoff_square,
|
||||
float *direct_ene) {
|
||||
int atom_i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (atom_i < atom_numbers) {
|
||||
NEIGHBOR_LIST nl_i = nl[atom_i];
|
||||
int N = nl_i.atom_numbers;
|
||||
int atom_j;
|
||||
int int_x;
|
||||
int int_y;
|
||||
int int_z;
|
||||
UNSIGNED_INT_VECTOR r1 = uint_crd[atom_i], r2;
|
||||
VECTOR dr;
|
||||
float dr2;
|
||||
float dr_abs;
|
||||
// float dr_inverse;
|
||||
float ene_temp;
|
||||
float charge_i = charge[atom_i];
|
||||
float ene_lin = 0.;
|
||||
|
||||
for (int j = threadIdx.y; j < N; j = j + blockDim.y) {
|
||||
atom_j = nl_i.atom_serial[j];
|
||||
r2 = uint_crd[atom_j];
|
||||
|
||||
int_x = r2.uint_x - r1.uint_x;
|
||||
int_y = r2.uint_y - r1.uint_y;
|
||||
int_z = r2.uint_z - r1.uint_z;
|
||||
dr.x = boxlength[0].x * int_x;
|
||||
dr.y = boxlength[0].y * int_y;
|
||||
dr.z = boxlength[0].z * int_z;
|
||||
|
||||
dr2 = dr.x * dr.x + dr.y * dr.y + dr.z * dr.z;
|
||||
if (dr2 < cutoff_square) {
|
||||
dr_abs = norm3df(dr.x, dr.y, dr.z);
|
||||
ene_temp = charge_i * charge[atom_j] * erfcf(beta * dr_abs) / dr_abs;
|
||||
ene_lin = ene_lin + ene_temp;
|
||||
}
|
||||
}
|
||||
atomicAdd(&direct_ene[atom_i], ene_lin);
|
||||
}
|
||||
}
|
||||
|
||||
__global__ static void PME_Energy_Product(const int element_number, const float *list1, const float *list2,
|
||||
float *sum) {
|
||||
if (threadIdx.x == 0) {
|
||||
sum[0] = 0.;
|
||||
}
|
||||
__syncthreads();
|
||||
float lin = 0.0;
|
||||
for (int i = threadIdx.x; i < element_number; i = i + blockDim.x) {
|
||||
lin = lin + list1[i] * list2[i];
|
||||
}
|
||||
atomicAdd(sum, lin);
|
||||
}
|
||||
|
||||
__global__ static void PME_BCFQ(cufftComplex *PME_FQ, float *PME_BC, int PME_Nfft) {
|
||||
int index = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (index < PME_Nfft) {
|
||||
float tempf = PME_BC[index];
|
||||
cufftComplex tempc = PME_FQ[index];
|
||||
PME_FQ[index].x = tempc.x * tempf;
|
||||
PME_FQ[index].y = tempc.y * tempf;
|
||||
}
|
||||
}
|
||||
|
||||
__global__ static void PME_Excluded_Energy_Correction(const int atom_numbers, const UNSIGNED_INT_VECTOR *uint_crd,
|
||||
const VECTOR *sacler, const float *charge, const float pme_beta,
|
||||
const float sqrt_pi, const int *excluded_list_start,
|
||||
const int *excluded_list, const int *excluded_atom_numbers,
|
||||
float *ene) {
|
||||
int atom_i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (atom_i < atom_numbers) {
|
||||
int excluded_number = excluded_atom_numbers[atom_i];
|
||||
if (excluded_number > 0) {
|
||||
int list_start = excluded_list_start[atom_i];
|
||||
// int atom_min = excluded_list[list_start];
|
||||
int list_end = list_start + excluded_number;
|
||||
int atom_j;
|
||||
int int_x;
|
||||
int int_y;
|
||||
int int_z;
|
||||
|
||||
float charge_i = charge[atom_i];
|
||||
float charge_j;
|
||||
float dr_abs;
|
||||
float beta_dr;
|
||||
|
||||
UNSIGNED_INT_VECTOR r1 = uint_crd[atom_i], r2;
|
||||
VECTOR dr;
|
||||
float dr2;
|
||||
|
||||
float ene_lin = 0.;
|
||||
|
||||
for (int i = list_start; i < list_end; i = i + 1) {
|
||||
atom_j = excluded_list[i];
|
||||
r2 = uint_crd[atom_j];
|
||||
charge_j = charge[atom_j];
|
||||
|
||||
int_x = r2.uint_x - r1.uint_x;
|
||||
int_y = r2.uint_y - r1.uint_y;
|
||||
int_z = r2.uint_z - r1.uint_z;
|
||||
dr.x = sacler[0].x * int_x;
|
||||
dr.y = sacler[0].y * int_y;
|
||||
dr.z = sacler[0].z * int_z;
|
||||
dr2 = dr.x * dr.x + dr.y * dr.y + dr.z * dr.z;
|
||||
|
||||
dr_abs = sqrtf(dr2);
|
||||
beta_dr = pme_beta * dr_abs;
|
||||
|
||||
ene_lin -= charge_i * charge_j * erff(beta_dr) / dr_abs;
|
||||
}
|
||||
atomicAdd(ene, ene_lin);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
|
@ -1,85 +0,0 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
/**
|
||||
* Note:
|
||||
* PMEEnergy. This is an experimental interface that is subject to change and/or deletion.
|
||||
*/
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/sponge/pme/pme_energy_impl.cuh"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/sponge/pme/pme_common.cuh"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common_sponge.cuh"
|
||||
|
||||
__global__ void PME_Energy_Reciprocal(const int element_number, const cufftComplex *FQ, const float *BC, float *sum) {
|
||||
if (threadIdx.x == 0) {
|
||||
sum[0] = 0.;
|
||||
}
|
||||
__syncthreads();
|
||||
float lin = 0.0;
|
||||
cufftComplex FQ_i;
|
||||
for (int i = threadIdx.x; i < element_number; i = i + blockDim.x) {
|
||||
FQ_i = FQ[i];
|
||||
lin = lin + (FQ_i.x * FQ_i.x + FQ_i.y * FQ_i.y) * BC[i];
|
||||
}
|
||||
atomicAdd(sum, lin);
|
||||
}
|
||||
|
||||
void PMEEnergy(int fftx, int ffty, int fftz, int atom_numbers, float beta, float *PME_BC, int *pme_uxyz,
|
||||
float *pme_frxyz, float *PME_Q, float *pme_fq, int *PME_atom_near, int *pme_kxyz, const int *uint_crd_f,
|
||||
const float *charge, int *nl_atom_numbers, int *nl_atom_serial, int *nl, const float *scaler_f,
|
||||
const int *excluded_list_start, const int *excluded_list, const int *excluded_atom_numbers,
|
||||
float *d_reciprocal_ene, float *d_self_ene, float *d_direct_ene, float *d_correction_ene,
|
||||
dim3 thread_PME, int PME_Nin, int PME_Nfft, int PME_Nall, const cufftHandle &PME_plan_r2c,
|
||||
const cufftHandle &PME_plan_c2r, cudaStream_t stream) {
|
||||
UNSIGNED_INT_VECTOR *uint_crd =
|
||||
const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f));
|
||||
VECTOR *scaler = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(scaler_f));
|
||||
int max_neighbor_numbers = 800;
|
||||
NEIGHBOR_LIST *nl_a = reinterpret_cast<NEIGHBOR_LIST *>(nl);
|
||||
construct_neighbor_list_kernel<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>(
|
||||
atom_numbers, max_neighbor_numbers, nl_atom_numbers, nl_atom_serial, nl_a);
|
||||
|
||||
UNSIGNED_INT_VECTOR *PME_uxyz = reinterpret_cast<UNSIGNED_INT_VECTOR *>(pme_uxyz);
|
||||
UNSIGNED_INT_VECTOR *PME_kxyz = reinterpret_cast<UNSIGNED_INT_VECTOR *>(pme_kxyz);
|
||||
VECTOR *PME_frxyz = reinterpret_cast<VECTOR *>(pme_frxyz);
|
||||
cufftComplex *PME_FQ = reinterpret_cast<cufftComplex *>(pme_fq);
|
||||
|
||||
Reset_List<<<3 * atom_numbers / 32 + 1, 32, 0, stream>>>(3 * atom_numbers, reinterpret_cast<int *>(PME_uxyz),
|
||||
1 << 30);
|
||||
PME_Atom_Near<<<atom_numbers / 32 + 1, 32, 0, stream>>>(
|
||||
uint_crd, PME_atom_near, PME_Nin, periodic_factor_inverse * fftx, periodic_factor_inverse * ffty,
|
||||
periodic_factor_inverse * fftz, atom_numbers, fftx, ffty, fftz, PME_kxyz, PME_uxyz, PME_frxyz);
|
||||
|
||||
Reset_List<<<PME_Nall / 1024 + 1, 1024, 0, stream>>>(PME_Nall, PME_Q, 0);
|
||||
|
||||
PME_Q_Spread<<<atom_numbers / thread_PME.x + 1, thread_PME, 0, stream>>>(PME_atom_near, charge, PME_frxyz, PME_Q,
|
||||
PME_kxyz, atom_numbers);
|
||||
|
||||
cufftExecR2C(PME_plan_r2c, reinterpret_cast<float *>(PME_Q), reinterpret_cast<cufftComplex *>(PME_FQ));
|
||||
|
||||
PME_Energy_Reciprocal<<<1, 1024, 0, stream>>>(PME_Nfft, PME_FQ, PME_BC, d_reciprocal_ene);
|
||||
|
||||
PME_Energy_Product<<<1, 1024, 0, stream>>>(atom_numbers, charge, charge, d_self_ene);
|
||||
Scale_List<<<1, 1, 0, stream>>>(1, d_self_ene, -beta / sqrtf(PI));
|
||||
|
||||
Reset_List<<<1, 1, 0, stream>>>(1, d_direct_ene, 0.0);
|
||||
PME_Direct_Energy<<<atom_numbers / thread_PME.x + 1, thread_PME, 0, stream>>>(
|
||||
atom_numbers, nl_a, uint_crd, scaler, charge, beta, cutoff * cutoff, d_direct_ene);
|
||||
|
||||
Reset_List<<<1, 1, 0, stream>>>(1, d_correction_ene, 0.0);
|
||||
PME_Excluded_Energy_Correction<<<atom_numbers / 32 + 1, 32, 0, stream>>>(
|
||||
atom_numbers, uint_crd, scaler, charge, beta, sqrtf(PI), excluded_list_start, excluded_list, excluded_atom_numbers,
|
||||
d_correction_ene);
|
||||
return;
|
||||
}
|
|
@ -1,32 +0,0 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_PME_PME_ENERGY_IMPL_H_
|
||||
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_PME_PME_ENERGY_IMPL_H_
|
||||
|
||||
#include <cufft.h>
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
|
||||
|
||||
CUDA_LIB_EXPORT void PMEEnergy(int fftx, int ffty, int fftz, int atom_numbers, float beta, float *PME_BC, int *pme_uxyz,
|
||||
float *pme_frxyz, float *PME_Q, float *pme_fq, int *PME_atom_near, int *pme_kxyz,
|
||||
const int *uint_crd_f, const float *charge, int *nl_atom_numbers, int *nl_atom_serial,
|
||||
int *nl, const float *scaler_f, const int *excluded_list_start, const int *excluded_list,
|
||||
const int *excluded_atom_numbers, float *d_reciprocal_ene, float *d_self_ene,
|
||||
float *d_direct_ene, float *d_correction_ene, dim3 thread_PME, int PME_Nin, int PME_Nfft,
|
||||
int PME_Nall, const cufftHandle &PME_plan_r2c, const cufftHandle &PME_plan_c2r,
|
||||
cudaStream_t stream);
|
||||
|
||||
#endif
|
|
@ -1,90 +0,0 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
/**
|
||||
* Note:
|
||||
* PMEEnergyUpdate. This is an experimental interface that is subject to change and/or deletion.
|
||||
*/
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/sponge/pme/pme_energy_update_impl.cuh"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/sponge/pme/pme_common.cuh"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/sponge/common_sponge.cuh"
|
||||
|
||||
__global__ void PME_Energy_Reciprocal_update(const int element_number, const cufftComplex *FQ, const float *BC,
|
||||
float *sum) {
|
||||
if (threadIdx.x == 0) {
|
||||
sum[0] = 0.;
|
||||
}
|
||||
__syncthreads();
|
||||
float lin = 0.0;
|
||||
cufftComplex FQ_i;
|
||||
for (int i = threadIdx.x; i < element_number; i = i + blockDim.x) {
|
||||
FQ_i = FQ[i];
|
||||
lin = lin + (FQ_i.x * FQ_i.x + FQ_i.y * FQ_i.y) * BC[i];
|
||||
}
|
||||
atomicAdd(sum, lin);
|
||||
}
|
||||
|
||||
void PMEEnergyUpdate(int fftx, int ffty, int fftz, int atom_numbers, float beta, float *PME_BC, int *pme_uxyz,
|
||||
float *pme_frxyz, float *PME_Q, float *pme_fq, int *PME_atom_near, int *pme_kxyz,
|
||||
const int *uint_crd_f, const float *charge, int *nl_atom_numbers, int *nl_atom_serial, int *nl,
|
||||
const float *scaler_f, const int *excluded_list_start, const int *excluded_list,
|
||||
const int *excluded_atom_numbers, float *d_reciprocal_ene, float *d_self_ene, float *d_direct_ene,
|
||||
float *d_correction_ene, dim3 thread_PME, int PME_Nin, int PME_Nfft, int PME_Nall,
|
||||
const cufftHandle &PME_plan_r2c, const cufftHandle &PME_plan_c2r, float *neutralizing_factor,
|
||||
float *charge_sum, int max_neighbor_numbers, cudaStream_t stream) {
|
||||
UNSIGNED_INT_VECTOR *uint_crd =
|
||||
const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f));
|
||||
VECTOR *scaler = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(scaler_f));
|
||||
// int max_neighbor_numbers = 800;
|
||||
NEIGHBOR_LIST *nl_a = reinterpret_cast<NEIGHBOR_LIST *>(nl);
|
||||
construct_neighbor_list_kernel<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>(
|
||||
atom_numbers, max_neighbor_numbers, nl_atom_numbers, nl_atom_serial, nl_a);
|
||||
|
||||
UNSIGNED_INT_VECTOR *PME_uxyz = reinterpret_cast<UNSIGNED_INT_VECTOR *>(pme_uxyz);
|
||||
UNSIGNED_INT_VECTOR *PME_kxyz = reinterpret_cast<UNSIGNED_INT_VECTOR *>(pme_kxyz);
|
||||
VECTOR *PME_frxyz = reinterpret_cast<VECTOR *>(pme_frxyz);
|
||||
cufftComplex *PME_FQ = reinterpret_cast<cufftComplex *>(pme_fq);
|
||||
|
||||
Reset_List<<<3 * atom_numbers / 32 + 1, 32, 0, stream>>>(3 * atom_numbers, reinterpret_cast<int *>(PME_uxyz),
|
||||
1 << 30);
|
||||
PME_Atom_Near<<<atom_numbers / 32 + 1, 32, 0, stream>>>(
|
||||
uint_crd, PME_atom_near, PME_Nin, periodic_factor_inverse * fftx, periodic_factor_inverse * ffty,
|
||||
periodic_factor_inverse * fftz, atom_numbers, fftx, ffty, fftz, PME_kxyz, PME_uxyz, PME_frxyz);
|
||||
|
||||
Reset_List<<<PME_Nall / 1024 + 1, 1024, 0, stream>>>(PME_Nall, PME_Q, 0);
|
||||
|
||||
PME_Q_Spread<<<atom_numbers / thread_PME.x + 1, thread_PME, 0, stream>>>(PME_atom_near, charge, PME_frxyz, PME_Q,
|
||||
PME_kxyz, atom_numbers);
|
||||
|
||||
cufftExecR2C(PME_plan_r2c, reinterpret_cast<float *>(PME_Q), reinterpret_cast<cufftComplex *>(PME_FQ));
|
||||
|
||||
PME_Energy_Reciprocal_update<<<1, 1024, 0, stream>>>(PME_Nfft, PME_FQ, PME_BC, d_reciprocal_ene);
|
||||
|
||||
PME_Energy_Product<<<1, 1024, 0, stream>>>(atom_numbers, charge, charge, d_self_ene);
|
||||
Scale_List<<<1, 1, 0, stream>>>(1, d_self_ene, -beta / sqrtf(PI));
|
||||
|
||||
Sum_Of_List<<<1, 1024>>>(atom_numbers, charge, charge_sum);
|
||||
device_add<<<1, 1>>>(d_self_ene, neutralizing_factor, charge_sum);
|
||||
|
||||
Reset_List<<<1, 1, 0, stream>>>(1, d_direct_ene, 0.0);
|
||||
PME_Direct_Energy<<<atom_numbers / thread_PME.x + 1, thread_PME, 0, stream>>>(
|
||||
atom_numbers, nl_a, uint_crd, scaler, charge, beta, cutoff * cutoff, d_direct_ene);
|
||||
|
||||
Reset_List<<<1, 1, 0, stream>>>(1, d_correction_ene, 0.0);
|
||||
PME_Excluded_Energy_Correction<<<atom_numbers / 32 + 1, 32, 0, stream>>>(
|
||||
atom_numbers, uint_crd, scaler, charge, beta, sqrtf(PI), excluded_list_start, excluded_list, excluded_atom_numbers,
|
||||
d_correction_ene);
|
||||
return;
|
||||
}
|
|
@ -1,34 +0,0 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_PME_PME_ENERGY_UPDATE_IMPL_H_
|
||||
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_PME_PME_ENERGY_UPDATE_IMPL_H_
|
||||
|
||||
#include <cufft.h>
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
|
||||
|
||||
CUDA_LIB_EXPORT void PMEEnergyUpdate(int fftx, int ffty, int fftz, int atom_numbers, float beta, float *PME_BC,
|
||||
int *pme_uxyz, float *pme_frxyz, float *PME_Q, float *pme_fq, int *PME_atom_near,
|
||||
int *pme_kxyz, const int *uint_crd_f, const float *charge, int *nl_atom_numbers,
|
||||
int *nl_atom_serial, int *nl, const float *scaler_f,
|
||||
const int *excluded_list_start, const int *excluded_list,
|
||||
const int *excluded_atom_numbers, float *d_reciprocal_ene, float *d_self_ene,
|
||||
float *d_direct_ene, float *d_correction_ene, dim3 thread_PME, int PME_Nin,
|
||||
int PME_Nfft, int PME_Nall, const cufftHandle &PME_plan_r2c,
|
||||
const cufftHandle &PME_plan_c2r, float *neutralizing_factor, float *charge_sum,
|
||||
int max_neighbor_numbers, cudaStream_t stream);
|
||||
|
||||
#endif
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue