From 3ae8fdccd83c8eabde2739a0abf6e99dbc14f3f1 Mon Sep 17 00:00:00 2001 From: Trung Nguyen Date: Tue, 19 Jun 2018 16:22:00 -0500 Subject: [PATCH] Updated the kernels for born/coul/long/cs and coul/long/cs gpu styles --- lib/gpu/lal_born_coul_long_cs.cu | 9 ++++----- lib/gpu/lal_coul_long_cs.cu | 8 ++++---- src/GPU/Install.sh | 4 ++++ 3 files changed, 12 insertions(+), 9 deletions(-) diff --git a/lib/gpu/lal_born_coul_long_cs.cu b/lib/gpu/lal_born_coul_long_cs.cu index c5f98567d9..a6821e1cf7 100644 --- a/lib/gpu/lal_born_coul_long_cs.cu +++ b/lib/gpu/lal_born_coul_long_cs.cu @@ -121,7 +121,7 @@ __kernel void k_born_coul_long_cs(const __global numtyp4 *restrict x_, if (factor_coul<(numtyp)1.0) { numtyp grij = g_ewald * (r+EPS_EWALD); numtyp expm2 = ucl_exp(-grij*grij); - numtyp t = ucl_recip((numtyp)1.0 + CS_EWALD_P*grij); + acctyp t = ucl_recip((numtyp)1.0 + CS_EWALD_P*grij); numtyp u = (numtyp)1.0 - t; _erfc = t * ((numtyp)1.0 + u*(B0+u*(B1+u*(B2+u*(B3+u*(B4+u*B5)))))) * expm2; prefactor /= (r+EPS_EWALD); @@ -132,7 +132,7 @@ __kernel void k_born_coul_long_cs(const __global numtyp4 *restrict x_, } else { numtyp grij = g_ewald * r; numtyp expm2 = ucl_exp(-grij*grij); - numtyp t = ucl_recip((numtyp)1.0 + CS_EWALD_P*grij); + acctyp t = ucl_recip((numtyp)1.0 + CS_EWALD_P*grij); numtyp u = (numtyp)1.0 - t; _erfc = t * ((numtyp)1.0 + u*(B0+u*(B1+u*(B2+u*(B3+u*(B4+u*B5)))))) * expm2; prefactor /= r; @@ -262,7 +262,7 @@ __kernel void k_born_coul_long_cs_fast(const __global numtyp4 *restrict x_, if (factor_coul<(numtyp)1.0) { numtyp grij = g_ewald * (r+EPS_EWALD); numtyp expm2 = ucl_exp(-grij*grij); - numtyp t = ucl_recip((numtyp)1.0 + CS_EWALD_P*grij); + acctyp t = ucl_recip((numtyp)1.0 + CS_EWALD_P*grij); numtyp u = (numtyp)1.0 - t; _erfc = t * ((numtyp)1.0 + u*(B0+u*(B1+u*(B2+u*(B3+u*(B4+u*B5)))))) * expm2; prefactor /= (r+EPS_EWALD); @@ -271,10 +271,9 @@ __kernel void k_born_coul_long_cs_fast(const __global numtyp4 *restrict x_, // scaling of the overall force shall be consistent r2inv = ucl_recip(rsq + EPS_EWALD_SQR); } else { - numtyp grij = g_ewald * r; numtyp expm2 = ucl_exp(-grij*grij); - numtyp t = ucl_recip((numtyp)1.0 + CS_EWALD_P*grij); + acctyp t = ucl_recip((numtyp)1.0 + CS_EWALD_P*grij); numtyp u = (numtyp)1.0 - t; _erfc = t * ((numtyp)1.0 + u*(B0+u*(B1+u*(B2+u*(B3+u*(B4+u*B5)))))) * expm2; prefactor /= r; diff --git a/lib/gpu/lal_coul_long_cs.cu b/lib/gpu/lal_coul_long_cs.cu index 1ff9445f4c..c038766129 100644 --- a/lib/gpu/lal_coul_long_cs.cu +++ b/lib/gpu/lal_coul_long_cs.cu @@ -205,7 +205,7 @@ __kernel void k_coul_long_cs(const __global numtyp4 *restrict x_, if (factor_coul<(numtyp)1.0) { numtyp grij = g_ewald * (r+EPS_EWALD); numtyp expm2 = ucl_exp(-grij*grij); - numtyp t = ucl_recip((numtyp)1.0 + CS_EWALD_P*grij); + acctyp t = ucl_recip((numtyp)1.0 + CS_EWALD_P*grij); numtyp u = (numtyp)1.0 - t; _erfc = t * ((numtyp)1.0 + u*(B0+u*(B1+u*(B2+u*(B3+u*(B4+u*B5)))))) * expm2; prefactor /= (r+EPS_EWALD); @@ -217,7 +217,7 @@ __kernel void k_coul_long_cs(const __global numtyp4 *restrict x_, } else { numtyp grij = g_ewald * r; numtyp expm2 = ucl_exp(-grij*grij); - numtyp t = ucl_recip((numtyp)1.0 + CS_EWALD_P*grij); + acctyp t = ucl_recip((numtyp)1.0 + CS_EWALD_P*grij); numtyp u = (numtyp)1.0 - t; _erfc = t * ((numtyp)1.0 + u*(B0+u*(B1+u*(B2+u*(B3+u*(B4+u*B5)))))) * expm2; prefactor /= r; @@ -320,7 +320,7 @@ __kernel void k_coul_long_cs_fast(const __global numtyp4 *restrict x_, if (factor_coul<(numtyp)1.0) { numtyp grij = g_ewald * (r+EPS_EWALD); numtyp expm2 = ucl_exp(-grij*grij); - numtyp t = ucl_recip((numtyp)1.0 + CS_EWALD_P*grij); + acctyp t = ucl_recip((numtyp)1.0 + CS_EWALD_P*grij); numtyp u = (numtyp)1.0 - t; _erfc = t * ((numtyp)1.0 + u*(B0+u*(B1+u*(B2+u*(B3+u*(B4+u*B5)))))) * expm2; prefactor /= (r+EPS_EWALD); @@ -331,7 +331,7 @@ __kernel void k_coul_long_cs_fast(const __global numtyp4 *restrict x_, } else { numtyp grij = g_ewald * r; numtyp expm2 = ucl_exp(-grij*grij); - numtyp t = ucl_recip((numtyp)1.0 + CS_EWALD_P*grij); + acctyp t = ucl_recip((numtyp)1.0 + CS_EWALD_P*grij); numtyp u = (numtyp)1.0 - t; _erfc = t * ((numtyp)1.0 + u*(B0+u*(B1+u*(B2+u*(B3+u*(B4+u*B5)))))) * expm2; prefactor /= r; diff --git a/src/GPU/Install.sh b/src/GPU/Install.sh index c3c2ce168c..9b029d15cb 100755 --- a/src/GPU/Install.sh +++ b/src/GPU/Install.sh @@ -35,6 +35,8 @@ action pair_beck_gpu.cpp action pair_beck_gpu.h action pair_born_coul_long_gpu.cpp pair_born_coul_long.cpp action pair_born_coul_long_gpu.h pair_born_coul_long.cpp +action pair_born_coul_long_cs_gpu.cpp pair_born_coul_long_cs.cpp +action pair_born_coul_long_cs_gpu.h pair_born_coul_long_cs.cpp action pair_born_coul_wolf_gpu.cpp action pair_born_coul_wolf_gpu.h action pair_born_gpu.cpp @@ -55,6 +57,8 @@ action pair_coul_dsf_gpu.cpp action pair_coul_dsf_gpu.h action pair_coul_long_gpu.cpp pair_coul_long.cpp action pair_coul_long_gpu.h pair_coul_long.cpp +action pair_coul_long_cs_gpu.cpp pair_coul_long_cs.cpp +action pair_coul_long_cs_gpu.h pair_coul_long_cs.cpp action pair_dpd_gpu.cpp action pair_dpd_gpu.h action pair_dpd_tstat_gpu.cpp